From 8d00d48c1aa472f763406f77d7f11396ec5f193f Mon Sep 17 00:00:00 2001
From: Ross John Hunter <ross.john.hunter@cern.ch>
Date: Sat, 17 Feb 2024 08:42:54 +0100
Subject: [PATCH] [RTA/DPA BW TESTS]: Give the bandwidth test html pages a
 makeover

Co-authored-by: Luke Grazette <l.grazette@warwick.ac.uk>
---
 python/MooreTests/calculate_stream_overlap.py |  83 +-
 python/MooreTests/combine_rate_output.py      |  54 +-
 python/MooreTests/line-and-stream-rates.py    | 332 ++++---
 python/MooreTests/make_bandwidth_test_page.py | 925 +++++++++---------
 python/PRConfig/bandwidth_helpers.py          |  77 ++
 .../benchmark-scripts/Moore_bandwidth_test.sh |  78 +-
 6 files changed, 846 insertions(+), 703 deletions(-)

diff --git a/python/MooreTests/calculate_stream_overlap.py b/python/MooreTests/calculate_stream_overlap.py
index 86fa7049..777b342e 100755
--- a/python/MooreTests/calculate_stream_overlap.py
+++ b/python/MooreTests/calculate_stream_overlap.py
@@ -30,23 +30,21 @@ def get_all_event_numbers(args):
     return ret
 
 
-def get_event_number_matrix(event_numbers_by_stream):
+def get_event_number_matrix(event_numbers):
 
     all_event_numbers = set([
-        evt_no for evt_no_list in event_numbers_by_stream.values()
-        for evt_no in evt_no_list
+        event_info for event_info_list in event_numbers.values()
+        for event_info in event_info_list
     ])
     print(
-        f"Found {len(all_event_numbers)} unique event numbers across {len(event_numbers_by_stream.keys())} streams."
+        f"Found {len(all_event_numbers)} unique event numbers across {len(event_numbers.keys())} categories/streams."
     )
 
     df = pd.DataFrame(
-        False,
-        index=list(all_event_numbers),
-        columns=event_numbers_by_stream.keys())
-    for stream, evt_no_list in event_numbers_by_stream.items():
-        for evt_no in evt_no_list:
-            df[stream][evt_no] = True
+        False, index=list(all_event_numbers), columns=event_numbers.keys())
+    for stream, event_info_list in event_numbers.items():
+        for event_info in event_info_list:
+            df[stream][event_info] = True
 
     return df
 
@@ -84,44 +82,61 @@ def save(df, htmlpath):
 def main():
 
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '-p',
-        '--process',
-        type=str,
-        help='Compute for Hlt2 or Sprucing lines',
-        choices=['hlt2', 'spruce'],
-        required=True)
-    parser.add_argument(
-        '--stream-config',
-        type=str,
-        help='Choose production or per-WG stream configuration',
-        choices=['production', 'wg'],
-        required=True)
-    parser.add_argument('--streams', nargs='+', type=str, required=True)
+    subparsers = parser.add_subparsers(help='Mode of execution', dest='mode')
+    inter_stream_parser = subparsers.add_parser("inter_stream")
+    inter_stream_parser.add_argument(
+        '--streams', nargs='+', type=str, required=True)
+
+    intra_stream_parser = subparsers.add_parser("intra_stream")
+    intra_stream_parser.add_argument('--stream', type=str, required=True)
+    for sp in [inter_stream_parser, intra_stream_parser]:
+        sp.add_argument(
+            '-p',
+            '--process',
+            type=str,
+            help='Compute for Hlt2 or Sprucing lines',
+            choices=['hlt2', 'spruce'],
+            required=True)
+        sp.add_argument(
+            '--stream-config',
+            type=str,
+            help='Choose production or per-WG stream configuration',
+            choices=['production', 'wg'],
+            required=True)
     args = parser.parse_args()
+
     fname_helper = FileNameHelper(args.process)
 
-    event_numbers = get_all_event_numbers(args)
-    for stream in args.streams:
-        print(
-            f"Found {len(event_numbers[stream])} events for {stream} stream.")
+    if args.mode == "inter_stream":
+        similarities_path = fname_helper.jaccard_similarities_path(
+            args.stream_config)
+        overlaps_path = fname_helper.overlap_matrix_path(args.stream_config)
+        event_numbers = get_all_event_numbers(args)
+    elif args.mode == "intra_stream":
+        similarities_path = fname_helper.intra_stream_jaccard_similarities_path(
+            args.stream_config, args.stream)
+        overlaps_path = fname_helper.intra_stream_overlap_matrix_path(
+            args.stream_config, args.stream)
+        with open(
+                fname_helper.intra_stream_event_no_fname(
+                    args.stream_config, args.stream), 'r') as f:
+            event_numbers = json.load(f)
 
     df = get_event_number_matrix(event_numbers)
 
-    ofile = fname_helper.jaccard_similarities_path(args.stream_config)
     sim_matrix = calculate_similarity_matrix(df)
     print(
-        f"Calculated similarity matrix. Printing and saving to html at {ofile}."
+        f"Calculated similarity matrix. Printing and saving to html at {similarities_path}."
     )
     print(sim_matrix)
-    save(sim_matrix, ofile)
+    save(sim_matrix, similarities_path)
 
-    ofile = fname_helper.overlap_matrix_path(args.stream_config)
     overlap_matrix = calculate_overlap_matrix(df)
     print(
-        f"Calculated overlap matrix. Printing and saving to html at {ofile}.")
+        f"Calculated overlap matrix. Printing and saving to html at {overlaps_path}."
+    )
     print(overlap_matrix)
-    save(overlap_matrix, ofile)
+    save(overlap_matrix, overlaps_path)
 
 
 if __name__ == "__main__":
diff --git a/python/MooreTests/combine_rate_output.py b/python/MooreTests/combine_rate_output.py
index 8497bbc1..5284e580 100755
--- a/python/MooreTests/combine_rate_output.py
+++ b/python/MooreTests/combine_rate_output.py
@@ -46,15 +46,15 @@ def _columns_per_line(process):
     }
 
 
-def _sorted_df_by_retention(df):
+def _sorted_df_by_bandwidth(df):
     return df.sort_values(
-        by=['Total Retention (%)'], ascending=False).reset_index(drop=True)
+        by=['Total Bandwidth (GB/s)'], ascending=False).reset_index(drop=True)
 
 
-def rates_all_lines(stream_config, fname_helper, process):
+def rates_all_lines(stream_config: str, fname_helper: FileNameHelper,
+                    process: str):
     """Make 1 enormous table with rate/bw info per line for all lines in all streams (i.e. n_rows = n_lines).
        Saves to .csv and .html.
-       stream_config is either "production" or "wg"
     """
 
     frames = []
@@ -67,7 +67,7 @@ def rates_all_lines(stream_config, fname_helper, process):
     df = pd.concat(frames)
     df.columns = _columns_per_line(process).keys()
 
-    df = _sorted_df_by_retention(df)
+    df = _sorted_df_by_bandwidth(df)
     df.to_csv(fname_helper.final_rate_table_all_lines_path("csv"))
 
     def highlight_vals(val, threshold, color='red'):
@@ -93,11 +93,11 @@ def rates_all_lines(stream_config, fname_helper, process):
     return
 
 
-def make_rate_table_row_per_line(stream_config, fname_helper, process):
+def make_rate_table_row_per_line(stream_config: str,
+                                 fname_helper: FileNameHelper, process: str):
     """ Makes (1 table with rate/bw info per line in the streamed mdf) for all <stream_config> streams (i.e. n_tables = n_streams).
         Puts them all on 1 html page, adds hyperlinks to jump to the different streams on the page.
         Saves to .html page only.
-        stream_config is either "production" or "wg"
     """
 
     with open(
@@ -109,8 +109,15 @@ def make_rate_table_row_per_line(stream_config, fname_helper, process):
             fname_helper.get_stream_from_bw_path(file): file
             for file in files
         }
-        f.write('<head></head>\n<body>\n<p>')
-        f.write('Jump to:\n<ul>')
+        f.write('<head></head>\n<body>\n')
+        f.write("""
+        <p>
+            Rates, event sizes and bandwidths of all lines in each stream, listed descending in bandwidth. <br>
+            Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br>
+            Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br>
+        </p>
+        """)
+        f.write('<p>Jump to:\n<ul>')
         for stream in files_by_stream.keys():
             f.write(
                 f'<li><a href="#{stream}_label"> {stream.upper()}</a></li>')
@@ -121,7 +128,7 @@ def make_rate_table_row_per_line(stream_config, fname_helper, process):
             f.write(f'<a id="{stream}_label">')
             df = pd.read_csv(file, header=None)
             df.columns = _columns_per_line(process).keys()
-            df = _sorted_df_by_retention(df)
+            df = _sorted_df_by_bandwidth(df)
             f.write(
                 df.style.format(
                     '{:.3g}', subset=df.columns[df.columns != 'Line']).
@@ -132,10 +139,10 @@ def make_rate_table_row_per_line(stream_config, fname_helper, process):
     return
 
 
-def make_rate_table_row_per_stream(stream_config, fname_helper, process):
+def make_rate_table_row_per_stream(stream_config: str,
+                                   fname_helper: FileNameHelper):
     """ Makes 1 table with rate/bw info integrated over the whole streamed mdf for all <stream_config> streams (i.e. a table with n_rows = n_streams).
         Saves to .html and .csv.
-        stream_config is either "production" or "wg"
     """
 
     frames = []
@@ -147,7 +154,7 @@ def make_rate_table_row_per_stream(stream_config, fname_helper, process):
     df = pd.concat(frames)
     df.columns = COLUMNS_PER_STREAM
 
-    df = _sorted_df_by_retention(df)
+    df = _sorted_df_by_bandwidth(df)
     df.to_csv(
         fname_helper.final_rate_table_all_streams_path(
             stream_config, ext="csv"))
@@ -173,17 +180,16 @@ if __name__ == "__main__":
         help='Compute for Hlt1, Hlt2 or Sprucing lines',
         choices=['hlt1', 'hlt2', 'spruce'],
         required=True)
+    parser.add_argument(
+        '--stream-config',
+        type=str,
+        choices=['streamless', 'production', 'wg'],
+        required=True)
     args = parser.parse_args()
 
     fname_helper = FileNameHelper(args.process)
-    stream_configs, main_stream_config = {
-        "hlt1": (["streamless"], "streamless"),
-        "hlt2": (["production", "wg"], "production"),
-        "spruce": (["wg"], "wg")
-    }[args.process]
-
-    rates_all_lines(main_stream_config, fname_helper, args.process)
-    for stream_config in stream_configs:
-        make_rate_table_row_per_stream(stream_config, fname_helper,
-                                       args.process)
-        make_rate_table_row_per_line(stream_config, fname_helper, args.process)
+
+    rates_all_lines(args.stream_config, fname_helper, args.process)
+    make_rate_table_row_per_stream(args.stream_config, fname_helper)
+    make_rate_table_row_per_line(args.stream_config, fname_helper,
+                                 args.process)
diff --git a/python/MooreTests/line-and-stream-rates.py b/python/MooreTests/line-and-stream-rates.py
index d8b754cb..fd1f9cce 100644
--- a/python/MooreTests/line-and-stream-rates.py
+++ b/python/MooreTests/line-and-stream-rates.py
@@ -13,45 +13,66 @@ import GaudiPython as GP
 from GaudiConf.reading import do_unpacking
 from Configurables import (ApplicationMgr, LHCbApp, IODataManager,
                            EventSelector, LHCb__UnpackRawEvent,
-                           HltDecReportsDecoder)
+                           HltDecReportsDecoder, createODIN)
 from GaudiConf import IOHelper
 from PyConf.application import configured_ann_svc
-import operator
-from collections import Counter
 import json
 import argparse
 import csv
-from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml
+from pprint import pprint
+from dataclasses import dataclass, field
+from typing import List, Dict
+from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS
 '''
+    Run over MDF or DST file to compute:
 
-    When running production-stream config, returns:
-
-        Per line (in form of single HTML table):
+        Per line (in form of single .csv table):
             1. Inclusive retention
             2. Inclusive rate
             3. Exclusive retention
             4. Exclusive rate
-            5. Average DstData bank size
-            6. DstData bandwidth
-            7. Average event size (all banks in particular stream)
-            8. Bandwidth
-
-        Per stream in Turbo/Full/Turcal
-            1. Inclusive retention
-            2. Inclusive rate
-            3. Average DstData bank size
-            4. DstData bandwidth
             5. Average event size (all banks in particular stream)
             6. Bandwidth
+            7. Average DstData bank size
+            8. DstData bandwidth
 
-    When running wg-stream config, returns same figures as above (both per line and per stream)
-    When running streamless-stream config, returns just the per-line information.
-
+        Per stream (i.e. the whole file)
+            1. Inclusive retention
+            2. Inclusive rate
+            3. Average event size (all banks in particular stream)
+            4. Bandwidth
+            5. Average DstData bank size
+            6. DstData bandwidth
 '''
 
 LHCb = GP.gbl.LHCb
 RAW_BANK_TYPES = [(i, LHCb.RawBank.typeName(i))
                   for i in range(LHCb.RawBank.LastType)]
+B_to_kB = 1e-3
+MBps_to_GBps = 1e-3
+
+
+@dataclass
+class LineBW:
+    triggered: List[int] = field(
+        default_factory=lambda: [])  # list of ~event numbers
+    raw: float = 0  # Stores whole event size for every event that fires this line (inclusive)
+    dst: float = 0  # Just DstData raw bank size, as above
+
+
+@dataclass
+class FileBW:
+    triggered: Dict[int, int] = field(
+        default_factory=lambda: dict())  # {event_no: n_times_triggered}
+    raw: float = 0
+    dst: float = 0
+
+
+@dataclass
+class WGBW:
+    n_triggered: int = 0
+    raw: float = 0
+    dst: float = 0
 
 
 def rawbank_sizes(rawevent, lst):
@@ -68,52 +89,33 @@ def rawbank_sizes(rawevent, lst):
     return [(name, size(i)) for i, name in lst]
 
 
-def processing_events_per_line_and_stream(evt_max, lines, process, stream):
+def rate_info_from_file(evt_max, lines, process, stream):
+    '''Loop through the file to work out per-line and per-stream triggers and
+    event sizes. Outputs used by functions below to compute rates and bandwidths.
     '''
-        Returns, per line:
-        i) How many events triggered on
-        ii) How many unique events triggered on
-        iii) Average DstData size of all events
-        iv) Average size of all events
-
-        Returns, per stream:
-        i) How many events triggered on
-        ii) Average DstData size of all events
-        iii) Average size of all events
-    '''
-    # Per file (stream) information
-    events_file = 0
-    raw_size_all = 0
-    dst_size_all = 0
-
-    # Per line information
-    # Stores how many events each line fired on
-    event_stats = {
-        line: []
-        for line in [line + 'Decision' for line in list(lines)]
-    }
-
-    # Stores whole event size size
-    raw = {line: 0 for line in [line + 'Decision' for line in list(lines)]}
+    decisions = [line + 'Decision' for line in list(lines)]
 
-    # Stores DstData bank size
-    dst = {line: 0 for line in [line + 'Decision' for line in list(lines)]}
+    totals = FileBW()  # Per file (stream) information
+    line_totals = {dec: LineBW() for dec in decisions}  # per line
 
-    exclusive = {}
+    # This block only used for hlt2
+    event_info = {wg: set() for wg in KNOWN_WORKING_GROUPS}
+    event_info['Other'] = set()
+    event_info['TotalInclusive'] = set()
+    unknown_lines = set()
 
     # Loop over all events
     analysed = 0
     while analysed < evt_max:
         analysed += 1
 
-        exclusive.update({analysed: 0})
-
         # Run an event
         appMgr.run(1)
         report = evt[f'/Event/{process.capitalize()}/DecReports']
         # Sprucing has stream-dependent raw event locations - different to HLT1/2
         rawevent = evt[
             f'/Event/{stream if process == "spruce" else "DAQ"}/RawEvent']
+        header = evt["/Event/myODIN"]
 
         evtsize = sum(
             bank[1] for bank in rawbank_sizes(rawevent, RAW_BANK_TYPES))
@@ -122,103 +124,135 @@ def processing_events_per_line_and_stream(evt_max, lines, process, stream):
 
         # Will quit running if there are no more events in the input file
         if report:
-            # Count per file/stream
-            events_file += 1
-            raw_size_all += evtsize
-            dst_size_all += dstsize
-            for line in event_stats.keys():
-                # Count per line
-                if report.decReport(line):
-                    if report.decReport(line).decision() == 1:
-                        event_stats[line].append(analysed)
-                        exclusive[analysed] += 1
-                        raw[line] += evtsize
-                        dst[line] += dstsize
+            # Info per file/stream
+            totals.triggered.update({analysed: 0})
+            totals.raw += evtsize
+            totals.dst += dstsize
+            for dec in line_totals.keys():
+                # Info per line
+                if report.decReport(dec) and report.decReport(
+                        dec).decision() == 1:
+                    totals.triggered[analysed] += 1
+                    line_totals[dec].triggered.append(analysed)
+                    line_totals[dec].raw += evtsize
+                    line_totals[dec].dst += dstsize
+
+                    # Info per WG within the stream
+                    if process == "hlt2":
+                        info = (header.eventNumber(), evtsize, dstsize)
+                        event_info['TotalInclusive'].add(info)
+                        wg = guess_wg(dec, process)
+                        event_info[wg].add(info)
+                        if wg == "Other":
+                            unknown_lines.add(dec)
         else:
             break
-    # First three variables per stream/file, last four for lines
-    return events_file, raw_size_all, dst_size_all, event_stats, exclusive, raw, dst
-
-
-def rates_per_line(event_stats, exclusive, raw, dst, input_rate,
-                   output_file_path):
-
-    data = []
-
-    # Compute exclusive rate
-    sort = dict(
-        sorted(
-            {k: v
-             for (k, v) in exclusive.items() if v > 0}.items(),
-            key=operator.itemgetter(1),
-            reverse=True))
-
-    unique_events = [key for key, value in sort.items() if value == 1]
-
-    for line, val in event_stats.items():
-        events_all = val + unique_events
-        num_events = len(event_stats[line])
-        row_values = (
-            line,
-            num_events / LHCbApp().EvtMax * 100
-            if num_events else 0,  # Inclusive Retention (expressed as %)
-            num_events / LHCbApp().EvtMax * input_rate
-            if num_events else 0,  # Inclusive Rate (in kHz)
-            len([
-                key for key, value in Counter(events_all).items() if value > 1
-            ]) / LHCbApp().EvtMax * 100
-            if num_events else 0,  # Exclusive retention (expressed as %)
-            len([
-                key for key, value in Counter(events_all).items() if value > 1
-            ]) / LHCbApp().EvtMax * input_rate
-            if num_events else 0,  # Exclusive rate (in kHz)
-            raw[line] / num_events * 1e-3
-            if num_events else 0,  # Average event size (in kB)
-            (num_events / LHCbApp().EvtMax * raw[line] / num_events) *
-            input_rate / 1e6 if num_events else 0,  # Event bandwidth (in GB/s)
-            dst[line] / len(event_stats[line]) * 1e-3
-            if num_events else 0,  # Average DstData size (in kB)
-            (num_events / LHCbApp().EvtMax * dst[line] / num_events) *
-            input_rate / 1e6 if num_events else 0
-        )  # DstData Bandwidth (in GB/s)
-
-        data.append(row_values)
-
-    with open(output_file_path, 'w') as f:
+
+    if len(unknown_lines):
+        print(f"Found {len(unknown_lines)} unknown lines. They are...")
+        pprint(unknown_lines)
+
+    #Â Remove trigger-less WGs, and transform out of tuple for easier & more-robust lookups later
+    wg_event_numbers = {}
+    wg_bws = {}
+    if process == "hlt2":
+        for wg, event_info_set in event_info.items():
+            event_numbers = [info[0] for info in event_info_set]
+            if len(event_numbers) == 0: continue
+
+            wg_event_numbers[wg] = event_numbers
+            wg_bws[wg] = WGBW(
+                n_triggered=len(event_numbers),
+                raw=sum([info[1] for info in event_info_set]),
+                dst=sum([info[2] for info in event_info_set]))
+
+    return totals, line_totals, wg_bws, wg_event_numbers
+
+
+def _write_to_csv(output_path, rows):
+    with open(output_path, 'w') as f:
         csv_out = csv.writer(f)
-        for tup in data:
-            csv_out.writerow(tup)
+        for row in rows:
+            csv_out.writerow(row)
+
+
+def rates_per_line(line_totals: dict[str, LineBW], file_totals: FileBW,
+                   input_rate: float, output_file_path: str) -> None:
+
+    rows = []
+    for line, bw_info in line_totals.items():
+
+        n_events = LHCbApp().EvtMax
+        n_fired = len(bw_info.triggered)
+        retention = n_fired / n_events
+        rate = retention * input_rate  # kHz, inclusive
+
+        n_fired_excl = len([
+            evt for evt in bw_info.triggered if file_totals.triggered[evt] == 1
+        ])
+        excl_retention = n_fired_excl / n_events
+        excl_rate = excl_retention * input_rate  # kHz
+
+        avg_evt_size = bw_info.raw * B_to_kB / n_fired if n_fired else 0  # kB
+        avg_dst_size = bw_info.dst * B_to_kB / n_fired if n_fired else 0  # kB
 
+        bw = rate * avg_evt_size * MBps_to_GBps  # GB/s
+        dst_bw = rate * avg_dst_size * MBps_to_GBps  # GB/s
+
+        rows.append([
+            line, retention * 100, rate, excl_retention * 100, excl_rate,
+            avg_evt_size, bw, avg_dst_size, dst_bw
+        ])
+
+    _write_to_csv(output_file_path, rows)
     return
 
 
-def rates_per_stream(events, raw_size, dst_size, streamname, input_rate,
-                     output_file_path):
+def rates_per_stream(file_totals: FileBW, stream: str, input_rate: float,
+                     output_file_path: str) -> None:
 
-    data = []
+    n_events = LHCbApp().EvtMax
+    n_fired = len(file_totals.triggered)
+    retention = n_fired / n_events
+    rate = retention * input_rate
 
-    row_values = (
-        streamname,
-        events / LHCbApp().EvtMax * 100
-        if events else 0,  # Inclusive Retention (expressed as %)
-        events / LHCbApp().EvtMax * input_rate
-        if events else 0,  # Inclusive Rate (in kHz)
-        raw_size / events * 1e-3
-        if events else 0,  # Average event size (in kB)
-        (events / LHCbApp().EvtMax * raw_size / events) * input_rate / 1e6
-        if events else 0,  # Event bandwidth (in GB/s)
-        dst_size / events * 1e-3
-        if events else 0,  # Average DstData size (in kB)
-        (events / LHCbApp().EvtMax * dst_size / events) * input_rate / 1e6
-        if events else 0)  # DstData Bandwidth (in GB/s)
+    avg_evt_size = file_totals.raw * B_to_kB / n_fired if n_fired else 0
+    avg_dst_size = file_totals.dst * B_to_kB / n_fired if n_fired else 0
 
-    data.append(row_values)
+    bw = rate * avg_evt_size * MBps_to_GBps  # GB/s
+    dst_bw = rate * avg_dst_size * MBps_to_GBps  # GB/s
 
-    with open(output_file_path, 'w') as f:
-        csv_out = csv.writer(f)
-        for tup in data:
-            csv_out.writerow(tup)
+    row = (stream, retention * 100, rate, avg_evt_size, bw, avg_dst_size,
+           dst_bw)
 
+    _write_to_csv(output_file_path, [row])
+    return
+
+
+def rates_per_wg_intra_stream(wg_bws: dict[str, WGBW], input_rate: float,
+                              output_file_path: str) -> None:
+    # Get inclusive rates/bandwidths of each WG within this stream
+
+    n_events = LHCbApp().EvtMax
+    wg_bw_infos = dict()
+    for wg, bw_info in wg_bws.items():
+        rate = bw_info.n_triggered * input_rate / n_events  # kHz
+        wg_bw_infos[wg] = [
+            rate,
+            input_rate * (bw_info.raw * B_to_kB) * MBps_to_GBps /
+            n_events,  # bandwidth, GBs
+            input_rate * (bw_info.dst * B_to_kB) * MBps_to_GBps /
+            n_events,  # dst bandwidth GB/s
+        ]
+
+    n_metrics = len(wg_bw_infos['TotalInclusive'])
+    wg_bw_infos['SumWGs'] = [
+        sum(bw_info[i] for wg, bw_info in wg_bw_infos.items()
+            if wg != "TotalInclusive") for i in range(n_metrics)
+    ]
+    rows = [[wg] + bw_info for wg, bw_info in wg_bw_infos.items()]
+
+    _write_to_csv(output_file_path, rows)
     return
 
 
@@ -307,6 +341,11 @@ if __name__ == '__main__':
         # TODO do_unpacking will try to unpack HLT2 dec banks if spruce
         # leads to annoying ERROR messages that do nothing.
         algs = do_unpacking(**raw_event_unpacker_kwargs)
+        algs += [
+            LHCb__UnpackRawEvent(
+                BankTypes=['ODIN'], RawBankLocations=["DAQ/RawBanks/ODIN"]),
+            createODIN(ODIN="myODIN"),
+        ]
 
         appMgr = ApplicationMgr(TopAlg=algs)
         appMgr.ExtSvc += [
@@ -314,18 +353,31 @@ if __name__ == '__main__':
         ]
     appMgr = GP.AppMgr()
     evt = appMgr.evtsvc()
-    i_rate = int(input_config['input_rate'])
+    input_rate = int(input_config['input_rate'])
 
-    evts_all, rawbanks_all, dst_all, event_stats, exclusive, raw, dst = processing_events_per_line_and_stream(
+    file_totals, line_totals, wg_bws_for_hlt2, wg_event_numbers_for_hlt2 = rate_info_from_file(
         LHCbApp().EvtMax, lines, args.process, args.stream)
 
     # Calculate key quantities per stream
     rates_per_stream(
-        evts_all, rawbanks_all, dst_all, args.stream, i_rate,
+        file_totals, args.stream, input_rate,
         fname_helper.tmp_rate_table_per_stream_path(args.stream_config,
                                                     args.stream))
     # Calculate key quantities per line
     rates_per_line(
-        event_stats, exclusive, raw, dst, i_rate,
+        line_totals, file_totals, input_rate,
         fname_helper.tmp_rate_table_per_line_path(args.stream_config,
                                                   args.stream))
+
+    if args.process == "hlt2":
+        # Save the event numbers for WG-by-WG overlaps
+        event_number_file = fname_helper.intra_stream_event_no_fname(
+            args.stream_config, args.stream)
+        with open(event_number_file, 'w') as f:
+            json.dump(wg_event_numbers_for_hlt2, f)
+
+        # Calculate rates/bws within a stream
+        rates_per_wg_intra_stream(
+            wg_bws_for_hlt2, input_rate,
+            fname_helper.tmp_rate_table_intra_stream_path(
+                args.stream_config, args.stream))
diff --git a/python/MooreTests/make_bandwidth_test_page.py b/python/MooreTests/make_bandwidth_test_page.py
index cc79f147..52cd827d 100644
--- a/python/MooreTests/make_bandwidth_test_page.py
+++ b/python/MooreTests/make_bandwidth_test_page.py
@@ -12,45 +12,15 @@ import argparse
 import jinja2
 import matplotlib.pyplot as plt
 import pandas as pd
-import yaml
 import os
 from math import log10
 from dataclasses import dataclass, field
 from typing import List
 from collections import namedtuple
-from PRConfig.bandwidth_helpers import FileNameHelper
+from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS
 
 plt.ioff()
 
-TOP_LEVEL_HEADER = jinja2.Template("""
-<p>
-    slot.build_id: $$version$$<br>
-    start time: $$start_time$$<br>
-    end time: $$end_time$$<br>
-    platform: $$platform$$<br>
-    hostname: $$hostname$$<br>
-    cpu_info: $$cpu_info$$<br>
-    testing script path: {{SCRIPTPATH}}
-</p>
-<ul>
-    <li><a href="{{BASE_PATH}}/run.log">Logs</a></li>
-</ul>
-""")
-
-MEMORY_CONSUMPTION = jinja2.Template("""
-<object type="image/png" data="memory_consumption.png"></object>
-<p>
-    Memory consumption as functions of Wall-time. <br>
-    The virtual memory size is the total amount of memory the process may hypothetically access. <br>
-    The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
-    The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
-    As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
-    Swap memory is used when RAM is full. <br>
-    The maximum resident set size usage is $$max_rss$$ GB. <br>
-    The maximum proportional set size usage is $$max_pss$$ GB. <br>
-</p>
-""")
-
 SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template("""
 <html>
 <head></head>
@@ -60,25 +30,38 @@ SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template("""
     <b>{{EXIT_CODE_SENTENCE}}</b>
 </p>
 <p>
-    Results per working group and stream:
-    <ul>
-    <li>Inclusive retention and rate</li>
-    <li>(Jaccard) similarity matrix</li>
-    <li>(Conditional) overlap matrix</li>
-    <li>Average DstData size and bandwidth</li>
-    <li>Average event size and bandwidth</li>
-    </ul>
+    This page contains the results of the {{PROCESS}} bandwidth test. Main results:
 </p>
+{{main_rate_table}}
 <p>
-    Results per line: all of the above, plus
-    <ul>
-    <li>Exclusive retention and rate</li>
-    <li>Descriptives (whether persistreco and/or extra outputs is enabled)</li>
-    </ul>
+    The streaming configuration (i.e. which lines went to each stream) can be found in JSON format
+    <a href="{{BASE_PATH}}/{{stream_config_json}}">here</a>. <br>
+    This streaming configuration is our current set of lines to be used in the next data-taking period. <br>
+    "DstData" is the raw bank to which reconstructed information (candidates, other reconstructed tracks etc.) are saved. <br>
+    The "DstData bandwidth" is therefore the bandwidth counting only that raw bank. <br>
+    The total event size (and total bandwidth) count all raw banks (incl. DstData, and detector raw banks if present) in the file. <br>
 </p>
+<p> Scroll down to see: </p>
+<ul>
+    <li> Bar charts of rate and bandwidth for each WG within each stream (HLT2 only), </li>
+    <li> A pie chart of all lines split by WGs (HLT2 and sprucing only), </li>
+    <li> Information about the input sample, </li>
+    <li> Stacked histograms of all lines, split by WG, of rate/bandwidth metrics, </li>
+    <li> Memory consumption of the test as a function of time. </li>
+</ul>
+<p>
+    Further results can be found in the links below:
+</p>
+<ul>
+    {{LIST_OF_LINKS}}
+    $${{PROCESS}}__comparison$$
+    </b></b>
+</ul>
 <p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p>
+{{BAR_CHARTS}}
+{{LINES_PER_WG}}
 <p>
-    Input sample information:
+    <b>Input sample information:</b>
     <ul>
     <li>Config file: {{INPUT_CONFIG_PATH}}</li>
     <li>Input rate: {{INPUT_RATE}} kHz</li>
@@ -86,46 +69,18 @@ SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template("""
     <li>Radius of VELO opening: {{INPUT_VELO_RADIUS}} mm</li>
     </ul>
 </p>
-{{TEMPLATE}}
 <p>
-    Other results are shown by plots or tables (in the links) below. <br>
+    <b>Stacked histograms of all lines, split by WG, of rate/bandwidth metrics:</b> <br>
+    The total distributions are shown as a stacked histogram, split into several histograms of WGs. <br>
+    The distributions per WG is attached in the html page linked above. <br>
+    Total event size is calculated from summing all raw banks in the file (including DstData). <br>
+    Where appropriate, the DstData raw bank size and DstData bandwidth are calculated from summing only the DstData raw bank. <br>
 </p>
-{{LINES_PER_WG}}
+<object type="image/png" data="{{PROCESS}}__hist__tot_bandwidth.png"></object>
 <object type="image/png" data="{{PROCESS}}__hist__rate.png"></object>
-<p>
-    Distribution of rate of selection lines. <br>
-    The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
-    The distributions per WG is attached in the html page below. <br>
-    A line is considered to be "problematic" if it has a rate of 0 Hz
-    or larger than 1 kHz, which requires some attention. <br>
-    The rates of all lines are listed in a html page attached below. <br>
-</p>
-
-{{DST_DATA_HIST}}
-
 <object type="image/png" data="{{PROCESS}}__hist__total_size.png"></object>
-<p>
-    Distribution of total event size of selection lines. <br>
-    The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
-    The distributions per WG is attached in the html page below. <br>
-    A line is considered to be "problematic" if its DstData size or total event size
-    is larger than 1 MB, which requires some attention. <br>
-    The event sizes of all lines are listed in a html page attached below. <br>
-</p>
-
-{{DST_BW_HIST}}
-
-<object type="image/png" data="{{PROCESS}}__hist__tot_bandwidth.png"></object>
-<p>
-    Distribution of bandwidth computed from total event size. <br>
-    The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
-    The distributions per WG is attached in the html page below. <br>
-    Currently, a line is considered to be "problematic" if its bandwidth from DstData size
-    is larger than 200 MB/s, which requires some attention. This is a temporary limit. <br>
-    The event sizes of all lines are listed in a html page attached below. <br>
-</p>
+{{DST_DATA_HIST}}
 {{MEMORY_CONSUMPTION}}
-{{ALL_RESULTS}}
 </body>
 </html>
 """)
@@ -140,168 +95,13 @@ HLT2_AND_SPRUCE_REPORT_TEMPLATE = jinja2.Template("""
     The appropriate process-specific webpages can be found below.
 </p>
 <ul>
-    <li><a href="{{BASE_PATH}}/hlt2__index.html">Hlt2 index</a></li>
-    <li><a href="{{BASE_PATH}}/spruce__index.html">Spruce index</a></li>
+    <li><a href="{{BASE_PATH}}/hlt2__index.html">Hlt2 results</a></li>
+    <li><a href="{{BASE_PATH}}/spruce__index.html">Sprucing results</a></li>
 </ul>
 {{MEMORY_CONSUMPTION}}
 </body>
 </html>""")
 
-HLT1_REPORT_TEMPLATE = jinja2.Template("""<p>
-    The bandwidth test was run under a single streamless configuration. <br>
-    The definition of the configuration can be found below.
-</p>
-<ul>
-    <li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">Streamless configuration</a></li>
-</ul>
-<p>
-    The streamless configuration is representative of data taking. <br>
-    The rates, event sizes and bandwidth results from the streamless configuration is: <br>
-</p>
-<p>
-</p>
-{{table_streamless_rates}}""")
-
-HLT2_REPORT_TEMPLATE = jinja2.Template("""<p>
-    The bandwidth test was run under 2 streaming configurations: production streams (Full, Turbo etc.) and one stream per WG. <br>
-    The definition of the production streaming and working-group streaming can be found below.
-</p>
-<ul>
-    <li><a href="{{BASE_PATH}}/{{stream_config_json_prod}}">Production-stream configuration</a></li>
-    <li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">WG-stream configuration</a></li>
-</ul>
-<p>
-    The production stream configuration reflects the streaming we will have for data taking. <br>
-    The rates, event sizes and bandwidths results from production-stream configuration is: <br>
-</p>
-{{table_5stream_rates}}""")
-
-SPRUCE_REPORT_TEMPLATE = jinja2.Template("""<p>
-    The bandwidth test was run under 1 streaming configuration: one stream per WG. <br>
-    The definition of per-WG-stream configuration can be found below.
-</p>
-<ul>
-    <li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">WG-stream configuration</a></li>
-</ul>
-<p>
-    The wg-stream configuration is close to what we will have for data taking. <br>
-    The rates, event sizes and bandwidths results from wg-stream configuration is: <br>
-</p>
-{{table_wgstream_rates}}""")
-
-HLT1_ALL_RESULTS = jinja2.Template("""
-<ul>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li>
-    $${{PROCESS}}__comparison$$
-</ul>
-""")
-
-HLT2_ALL_RESULTS = jinja2.Template("""
-<ul>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__other_lines.html">Show list of lines in "Other" category</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__plots_per_wg.html">Show plots split by WGs</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__similarity_matrices.html"> Show similarity Jaccards and overlap matrices between streams for different stream configurations</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__rates_streaming.html"> Show rates of streams under different configurations</a></li>
-    <li><a href="{{BASE_PATH}}/{{line_descr}}"> PersistReco and ExtraOutput for selection lines</a></li>
-    <li><a href="{{BASE_PATH}}/{{rate_table_split_by_wg_stream}}"> Split by working group: rates, event sizes and bandwidths of all lines</a></li>
-    <li><a href="{{BASE_PATH}}/{{rate_table_split_by_prod_stream}}"> Split by production stream: rates, event sizes and bandwidths of all lines</a></li>
-    $${{PROCESS}}__comparison$$
-    </b></b>
-</ul>
-""")
-
-SPRUCING_ALL_RESULTS = jinja2.Template("""
-<ul>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__other_lines.html">Show list of lines in "Other" category</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__plots_per_wg.html">Show plots split by WGs</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__similarity_matrices.html"> Show similarity Jaccards and overlap matrices between streams for different stream configurations</a></li>
-    <li><a href="{{BASE_PATH}}/{{PROCESS}}__rates_streaming.html"> Show rates of streams under different configurations</a></li>
-    <li><a href="{{BASE_PATH}}/{{line_descr}}"> PersistReco and ExtraOutput for selection lines</a></li>
-    <li><a href="{{BASE_PATH}}/{{rate_table_split_by_wg_stream}}"> Split by working group: rates, event sizes and bandwidths of all lines</a></li>
-    $${{PROCESS}}__comparison$$
-    </b></b>
-</ul>
-""")
-
-HLT1_LINES_PER_WG = jinja2.Template("""""")
-HLT1_DST_DATA_HIST = jinja2.Template("""""")
-HLT1_DST_BW_HIST = jinja2.Template("""""")
-
-HLT2_OR_SPRUCING_LINES_PER_WG = jinja2.Template("""
-<object type="image/png" data="{{PROCESS}}__lines_per_wg.png"></object>
-<p>
-    The number of selection lines per working group. <br>
-    "Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
-    To make lines properly categorized, one should follow the naming convention,
-    name of lines should start with `Hlt1/Hlt2/Spruce[WG]_`.
-</p>
-""")
-
-HLT2_OR_SPRUCING_DST_DATA_HIST = jinja2.Template("""
-<object type="image/png" data="{{PROCESS}}__hist__dst_data_size.png"></object>
-<p>
-    Distribution of DstData RawBank size of selection lines. <br>
-    The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
-    The distributions per WG is attached in the html page below.
-</p>
-""")
-
-HLT2_OR_SPRUCING_DST_BW_HIST = jinja2.Template("""
-<object type="image/png" data="{{PROCESS}}__hist__dst_bandwidth.png"></object>
-<p>
-    Distribution of bandwidth computed from DstData RawBank size. <br>
-    The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
-    The distributions per WG is attached in the html page below.
-</p>
-""")
-
-TABLE_OTHER_LINE_TEMPLATE = jinja2.Template("""
-<p>
-    List of line names that categorized to "Others".
-</p>
-{{table_other_lines}}
-""")
-
-PLOTS_PER_WG_TEMPLATE = jinja2.Template("""
-<p>
-    Plots of rates, event sizes and bandwidths for lines, split into different WGs.
-</p>
-{{plots_per_wg}}
-""")
-
-ALL_RATE_TEMPLATE = jinja2.Template("""
-<p>
-    Rates, event sizes and bandwidths of all lines, listed descending in retention rates. <br>
-    The results are obtained by a per-event analysing under 5-stream configuration. <br>
-    These numbers are also saved in a csv file: <a href="{{BASE_PATH}}/{{CSV_PATH}}">{{CSV_PATH}}</a>
-</p>
-""")
-
-known_working_groups = [
-    "B2CC",
-    "B2OC",
-    "BandQ",
-    "BnoC",
-    "Calib",
-    "Calo",
-    "Charm",
-    "DPA",
-    "HLT",
-    "IFT",
-    "Luminosity",
-    "PID",
-    "QCD",
-    "QEE",
-    "RD",
-    "RTA",
-    "Simulation",
-    "SL",
-    "Tagging",
-    "Tracking",
-]
-
 
 @dataclass
 class WGRateBWInfo:
@@ -355,14 +155,13 @@ def histo_maker(entry_list,
 
 def make_plots_per_wg(fname_helper, wg_name, wg_bw_info, process):
     '''
-    Make plots of rates and event sizes for each WG.
+    Make plots of rates/bandwidths and event sizes for each WG.
 
     Arguments:
+        fname_helper: instance of FileNameHelper
         wg_name: name of the working group
-        rate_list: list containing rates of all lines from the WG
-        dst_size_list: list containing DstData Rawbank size of all lines from the WG
-        tot_size_list: list containing total event size of all lines from the WG
-        process: either `hlt2` or `spruce`
+        wg_bw_info: dict(wg_name: WGRateBWInfo object)
+        process: `hlt1`, `hlt2` or `spruce`
     '''
 
     title = f"{wg_name} {process.capitalize()}"
@@ -381,52 +180,47 @@ def make_plots_per_wg(fname_helper, wg_name, wg_bw_info, process):
                 f"hist__{plot_bit}__{wg_name}.png"))
 
 
-def make_plots(all_lines_bw_info,
-               tot_rate,
-               tot_bandwidth,
-               process,
-               wgs=known_working_groups):
+def make_plots(all_lines_bw_info, tot_rate, tot_bandwidth, fname_helper,
+               process):
     '''
-    Make plots of rate and event sizes of all lines.
-    It will create three stacked histograms containing distributions of all lines,
-    and a pie chart showing the number of lines per WG.
+    Make plots of rate, bandwidth and event sizes of all lines.
+    It will create 5 stacked histograms containing distributions of all lines
+    grouped by WG, and a pie chart showing the number of lines per WG.
 
     Arguments:
-        rate_dict: dictionary of line names and their rates
-        tot_rate: total rate of all lines
-        evt_size_dict: dictionary of line names and their event sizes
-        process: either `hlt2` or `spruce`
-        wgs: list of working groups to categorize
+        all_lines_bw_info: dict(line_name: LineRateBWInfo object)
+        tot_rate: total rate of all lines (arithmetic sum of stream rates)
+        tot_bandwidth: total bandwidth of all lines (arithmetic sum of stream BWs)
+        fname_helper: instance of FileNameHelper
+        process: `hlt1`, `hlt2` or `spruce`
+
+    Returns:
+        - list of found WGs with >= 1 line
+        - list of lines that didnt fit into 1 WG
     '''
 
     # Count number of lines and rates/evt sizes per WG
-    rate_info_per_wg = {wg: WGRateBWInfo() for wg in wgs + ["Other"]}
+    rate_info_per_wg = {
+        wg: WGRateBWInfo()
+        for wg in KNOWN_WORKING_GROUPS + ["Other"]
+    }
     list_other_lines = []
     for line, bw_info in all_lines_bw_info.items():
-        found_wg = False
-        # Expect e.g {Hlt1,Hlt2,Spruce}<WG>_<rest-of-line-name>
-        wg_guess = line.split("_")[0].removeprefix(process.capitalize())
-        for wg in rate_info_per_wg.keys():
-            if wg_guess.startswith(wg):
-                found_wg = True
-                rate_info_per_wg[wg].nlines += 1
-                for attrib in [
-                        "rate", "dst_size", "tot_size", "dst_bw", "tot_bw"
-                ]:
-                    getattr(rate_info_per_wg[wg], attrib).append(
-                        getattr(bw_info, attrib))
-        if not found_wg:
+        wg_guess = guess_wg(line, process)
+        rate_info_per_wg[wg_guess].nlines += 1
+        if wg_guess == "Other":
             list_other_lines.append(line)
-            rate_info_per_wg["Other"].nlines += 1
-            for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
-                getattr(rate_info_per_wg["Other"], attrib).append(
-                    getattr(bw_info, attrib))
+
+        for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
+            getattr(rate_info_per_wg[wg_guess], attrib).append(
+                getattr(bw_info, attrib))
+
     rate_info_per_wg = {
         k: info
         for k, info in rate_info_per_wg.items() if info.nlines != 0
     }
 
-    # Make a pie plot of lines per WG
+    # Make a pie chart of lines per WG
     labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()]
     fig = plt.figure()
     plt.pie([v.nlines for v in rate_info_per_wg.values()],
@@ -434,10 +228,13 @@ def make_plots(all_lines_bw_info,
             labels=labels,
             wedgeprops=dict(width=0.4, edgecolor="w"))
     plt.title(f"Number of {process.capitalize()} lines per WG")
-    plt.savefig(f"tmp/Output/{args.process}__lines_per_wg.png", format="png")
+    plt.savefig(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "lines_per_wg.png"),
+        format="png")
     plt.close(fig)
 
-    ### Make hist plots
+    # Stacked histograms
     title = f"{process.capitalize()}"
     for attrib, xtitle, title, plot_bit, take_log, log_th, range in zip(
         ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"], [
@@ -473,39 +270,294 @@ def make_plots(all_lines_bw_info,
     return rate_info_per_wg.keys(), list_other_lines
 
 
-def make_other_line_table(name_list):
-    table_html_str = r'''<table border = "1">
-    <tr>
-        <th> Name </th>
-    </tr>'''
-    for name in name_list:
-        table_html_str += '''
-    <tr>
-        <td> %s </td>
-    </tr>''' % name
-    table_html_str += '\n</table>'
-    return table_html_str
-
-
-def make_plots_per_wg_list(wg_list, process):
-    list_html_str = ''
-    for wg_name in wg_list:
-        list_html_str += f'''
+def make_bar_charts(rates_df, column, stream, plot_path):
+    """Bar charts of the WG-by-WG rates within 1 stream"""
+
+    fig = plt.figure()
+    plt.grid(True, axis='y', zorder=0, linestyle='dashed')
+    bars = plt.bar(rates_df['WG'], rates_df[column], zorder=3)
+    plt.bar_label(bars, fmt='%.1f')
+    plt.ylabel(column)
+    plt.xticks(rates_df['WG'], rates_df['WG'], rotation='vertical')
+    plt.subplots_adjust(bottom=0.25)
+    plt.title(f'{column} for each WG in the {stream.capitalize()} stream')
+    plt.savefig(plot_path, format="png")
+    plt.close(fig)
+
+
+def write_html_page(page_path, rendered_html):
+    if rendered_html:
+        with open(page_path, "w") as html_file:
+            html_file.write(rendered_html)
+
+
+def _render(html_str):
+    return jinja2.Template(html_str).render()
+
+
+def render_all_lines_page(fname_helper, building_locally):
+    csv_path = fname_helper.final_rate_table_all_lines_path(
+        "csv", full_path=False)
+    html_str = f"""
+    <p>
+        Rates, event sizes and bandwidths of all lines, listed descending in bandwidth. <br>
+        Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br>
+        Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br>
+        These numbers are also saved in a csv file: <a href="{fname_helper.base_html_path(building_locally)}/{csv_path}">{csv_path}</a>
+    </p>
+    """
+    with open(fname_helper.final_rate_table_all_lines_path("html"),
+              "r") as rate_table:
+        html_str += rate_table.read()
+    return _render(html_str)
+
+
+def render_top_level_header(script_path, base_path):
+    return _render(f"""
+        <p>
+            slot.build_id: $$version$$<br>
+            start time: $$start_time$$<br>
+            end time: $$end_time$$<br>
+            platform: $$platform$$<br>
+            hostname: $$hostname$$<br>
+            cpu_info: $$cpu_info$$<br>
+            testing script path: {script_path}
+        </p>
+        <ul>
+            <li><a href="{base_path}/run.log">Logs</a></li>
+        </ul>
+    """)
+
+
+def render_memory_consumption():
+    return _render("""
         <p>
-            Plots of {wg_name} group:
+            <b> Memory consumption of this test: </b>
         </p>
-        <object type="image/png" data="{process}__hist__rate__{wg_name}.png"></object>
-        <object type="image/png" data="{process}__hist__dst_data_size__{wg_name}.png"></object>
-        <object type="image/png" data="{process}__hist__total_size__{wg_name}.png"></object>
-        <object type="image/png" data="{process}__hist__dst_bandwidth__{wg_name}.png"></object>
-        <object type="image/png" data="{process}__hist__tot_bandwidth__{wg_name}.png"></object>
+        <object type="image/png" data="memory_consumption.png"></object>
+        <p>
+            Memory consumption as functions of Wall-time. <br>
+            The virtual memory size is the total amount of memory the process may hypothetically access. <br>
+            The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
+            The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
+            As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
+            Swap memory is used when RAM is full. <br>
+            The maximum resident set size usage is $$max_rss$$ GB. <br>
+            The maximum proportional set size usage is $$max_pss$$ GB. <br>
+        </p>
+    """)
+
+
+def render_other_line_table(process, lines):
+    if process == "hlt1":
+        return _render("")
+
+    html_str = """
+        <p>
+        List of line names that categorized to "Others".
+        </p>
+        """
+    html_str += r'''<table border = "1">
+        <tr>
+            <th> Name </th>
+        </tr>'''
+    for line in lines:
+        html_str += f'''
+            <tr>
+                <td> {line} </td>
+            </tr>'''
+    html_str += '\n</table>'
+    return _render(html_str)
+
+
+def render_plots_per_wg_page(process, wgs):
+    if process == "hlt1":
+        return _render("")
+
+    html_str = """
+    <p>
+        Plots of rates, event sizes and bandwidths for lines, split into different WGs.
+    </p>
+    """
+    for wg in wgs:
+        html_str += f'''
+        <p>
+            Plots of {wg} group:
+        </p>
+        <object type="image/png" data="{process}__hist__rate__{wg}.png"></object>
+        <object type="image/png" data="{process}__hist__dst_data_size__{wg}.png"></object>
+        <object type="image/png" data="{process}__hist__total_size__{wg}.png"></object>
+        <object type="image/png" data="{process}__hist__dst_bandwidth__{wg}.png"></object>
+        <object type="image/png" data="{process}__hist__tot_bandwidth__{wg}.png"></object>
         '''
-    return list_html_str
+    return _render(html_str)
+
 
+def render_dst_data_hists(process):
+    if process == "hlt1":
+        return _render("")
 
-def parse_yaml(file_path):
-    with open(os.path.expandvars(file_path), 'r') as f:
-        return yaml.safe_load(f)
+    html_str = ''
+    for hist_suffix in ("data_size", "bandwidth"):
+        html_str += f"""
+            <object type="image/png" data="{process}__hist__dst_{hist_suffix}.png"></object>
+        """
+    return _render(html_str)
+
+
+def render_lines_pie_chart(process):
+    if process == "hlt1":
+        return _render("")
+    return _render(f"""
+        <p>
+            <b>The number of selection lines per working group:</b> <br>
+        </p>
+        <object type="image/png" data="{process}__lines_per_wg.png"></object>
+        <p>
+            "Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
+            To make lines properly categorized, one should follow the naming convention -
+            name of lines should start with `Hlt2/Spruce[WG]_`.
+        </p>
+    """)
+
+
+def render_bar_charts(process,
+                      stream_config,
+                      streams,
+                      metrics=('bandwidth', 'rate')):
+    if process != "hlt2":
+        return _render("")
+
+    html_str = ''
+    for metric in metrics:
+        html_str += f'''
+        <p>
+            <b>{metric.capitalize()} within each stream:</b>
+        </p>
+        <p>
+            "TotalInclusive" is the physical rate/bandwidth of the stream. "SumWGs" is the simple arithmetic sum of all bars except "TotalInclusive".<br>
+            The difference between the two bars gives us information about the degree of WG-by-WG overlap.
+        </p>
+        '''
+        if stream_config == "production":
+            html_str += f'''
+            <p>
+            <b>Note:</b> The WG bars in the HLT2 Turbo stream correspond almost exactly to the output streams of the Sprucing passthrough of Turbo.<br>
+            Furthermore, this means <b>the "SumWGs" {metric} bar of HLT2 turbo is equal to the total physical {metric} of Turbo post-sprucing.</b><br>
+            </p>
+            '''
+        for stream in streams:
+            html_str += f'''
+                <object type="image/png" data="{process}__{metric}_bar_chart__{stream_config}__{stream}.png"></object>
+            '''
+    html_str += '''
+    <p>
+        Rates for a WG within a stream are calculated by counting the number of events saved to that stream in which at least 1 of that WG's lines fired.<br>
+        Bandwidths for a WG are calculated by summing the event size of all events saved to the stream in which at least 1 of that WG's lines fired.<br>
+    </p>
+    '''
+    return _render(html_str)
+
+
+def render_extra_sim_matrices(process, stream_config, streams):
+    if process != "hlt2":
+        return _render("")
+
+    html_str = """
+        <p>
+            The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
+            It shows how much events in the stream A are covered by another stream B. <br>
+            The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
+            i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
+        </p>
+        <p>
+            The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
+            It shows how similar the two streams are and is useful in bandwidth division. <br>
+        </p>
+    """
+    for stream in streams:
+        html_str += f"""
+            <p>
+                The overlap matrix of the {stream.capitalize()} stream is:
+            </p>
+        """
+        with open(
+                fname_helper.intra_stream_overlap_matrix_path(
+                    stream_config, stream), "r") as overlap:
+            html_str += overlap.read()
+        html_str += f"""
+            <p>
+                The Jaccard similarity matrix of the {stream.capitalize()} stream is:
+            </p>
+        """
+        with open(
+                fname_helper.intra_stream_jaccard_similarities_path(
+                    stream_config, stream), "r") as jaccard:
+            html_str += jaccard.read()
+    return _render(html_str)
+
+
+def list_of_links_html(process, fname_helper, stream_config, building_locally):
+    base_path = fname_helper.base_html_path(building_locally)
+    links = [
+        f"""<li><a href="{base_path}/{process}__all_rates.html"> A single rate/bandwidth table featuring every trigger line in all streams</a></li>"""
+    ]
+    if process != "hlt1":
+        rate_table_split_by_stream = fname_helper.final_rate_table_all_lines_split_by_stream_path(
+            stream_config, full_path=False)
+        links.append(
+            f"""<li><a href="{base_path}/{rate_table_split_by_stream}"> Rate/bandwidth tables for each stream, with 1 row per trigger line</a></li>"""
+        )
+        links.append(
+            f"""<li><a href="{base_path}/{process}__similarity_matrices.html"> Jaccard similarity and overlap matrices between streams</a></li>"""
+        )
+
+    if process == "hlt2":
+        links += [
+            f"""<li><a href="{base_path}/{process}__extra_bar_charts.html">Bar charts as below for DstData bandwidth</a></li>""",
+            f"""<li><a href="{base_path}/{process}__extra_similarity_matrices.html">Similarity and overlap matrices between WGs within each stream</a></li>""",
+        ]
+
+    if process != "hlt1":
+        links += [
+            f"""<li><a href="{base_path}/{process}__other_lines.html">List of lines in "Other" category</a></li>""",
+            f"""<li><a href="{base_path}/{fname_helper.line_descr_path(full_path=False)}"> PersistReco and ExtraOutput info for all lines in all streams</a></li>""",
+            f"""<li><a href="{base_path}/{process}__plots_per_wg.html"> Histograms of metrics for each WG</a></li>"""
+        ]
+
+    return "\n".join(links)
+
+
+def render_sim_matrices_page(process, fname_helper, stream_config):
+    if process == "hlt1":
+        return _render("")
+
+    html_str = f"""
+        <p>
+            The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
+            It shows how much events in the stream A are covered by another stream B. <br>
+            The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
+            i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
+        </p>
+        <p>
+            The overlap matrix of the {stream_config} streams is:
+        </p>
+    """
+    with open(fname_helper.overlap_matrix_path(stream_config), "r") as overlap:
+        html_str += overlap.read()
+    html_str += f"""
+        <p>
+            The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
+            It shows how similar the two streams are and is useful in bandwidth division. <br>
+        </p>
+        <p>
+            The Jaccard similarity matrix of the {stream_config} streams is:
+        </p>
+    """
+    with open(fname_helper.jaccard_similarities_path(stream_config),
+              "r") as jaccard:
+        html_str += jaccard.read()
+    return _render(html_str)
 
 
 def _write_message(message,
@@ -542,6 +594,18 @@ if __name__ == '__main__':
         type=str,
         required=True,
         help='Path to yaml config file defining the input.')
+    parser.add_argument(
+        '--stream-config',
+        type=str,
+        required=True,
+        choices=['wg', 'production', 'streamless'],
+    )
+    parser.add_argument(
+        '--streams',
+        type=str,
+        nargs='+',
+        required=True,
+        help='List of trigger streams.')
     parser.add_argument(
         '-s',
         '--script-path',
@@ -564,7 +628,7 @@ if __name__ == '__main__':
         '--skip-top-level-information-for-process-dependent-testpage',
         action='store_true',
         help=
-        'Flag to avoid memory-consumption and build information sections of {proces}__index page.'
+        'Flag to avoid memory-consumption and build information sections of {process}__index page.'
     )
     parser.add_argument(
         '--building-locally',
@@ -608,15 +672,9 @@ if __name__ == '__main__':
     n_high_rate = len(
         [info for info in rate_bw_info_by_line.values() if info.rate > tol])
 
-    main_stream_config = {
-        "hlt1": "streamless",
-        "hlt2": "production",
-        "spruce": "wg"
-    }[args.process]
-
     prod_df = pd.read_csv(
         fname_helper.final_rate_table_all_streams_path(
-            main_stream_config, ext="csv"))
+            args.stream_config, ext="csv"))
     tot_rate = sum(prod_df['Rate (kHz)'])
     tot_bandwidth = sum(prod_df['Total Bandwidth (GB/s)'])
 
@@ -625,87 +683,61 @@ if __name__ == '__main__':
         rate_bw_info_by_line,
         tot_rate=tot_rate,
         tot_bandwidth=tot_bandwidth,
+        fname_helper=fname_helper,
         process=args.process)
 
-    other_line_table = make_other_line_table(other_line_list)
-    plots_per_wg = make_plots_per_wg_list(wg_list, args.process)
+    # Bar charts within a stream - only relevant for HLT2
+    if args.process == 'hlt2':
+        for stream in args.streams:
+            intra_stream_rates_df = pd.read_csv(
+                fname_helper.tmp_rate_table_intra_stream_path(
+                    args.stream_config, stream),
+                header=None)
+            intra_stream_rates_df.columns = [
+                'WG', 'Rate (kHz)', 'Bandwidth (GB/s)',
+                'DstData Bandwidth (GB/s)'
+            ]
+            for column_header in intra_stream_rates_df.columns[1:]:
+                fname = {
+                    'Rate (kHz)': "rate",
+                    'Bandwidth (GB/s)': "bandwidth",
+                    'DstData Bandwidth (GB/s)': "dstbandwidth"
+                }[column_header]
+                make_bar_charts(
+                    intra_stream_rates_df, column_header, stream,
+                    fname_helper.bar_chart_path(args.stream_config, stream,
+                                                fname))
 
     with open(
-            fname_helper.final_rate_table_all_streams_path(main_stream_config),
+            fname_helper.final_rate_table_all_streams_path(args.stream_config),
             "r") as rate_html:
         table_main_stream_rates = rate_html.read()
-    if args.process == 'hlt2':
-        template = HLT2_REPORT_TEMPLATE.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            stream_config_json_prod=fname_helper.stream_config_json_path(
-                "production", full_path=False),
-            stream_config_json_wg=fname_helper.stream_config_json_path(
-                "wg", full_path=False),
-            table_5stream_rates=table_main_stream_rates)
-        all_results = HLT2_ALL_RESULTS.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            line_descr=fname_helper.line_descr_path(full_path=False),
-            rate_table_split_by_prod_stream=fname_helper.
-            final_rate_table_all_lines_split_by_stream_path(
-                "production", full_path=False),
-            rate_table_split_by_wg_stream=fname_helper.
-            final_rate_table_all_lines_split_by_stream_path(
-                "wg", full_path=False),
-            PROCESS=args.process)
-        lines_per_wg = HLT2_OR_SPRUCING_LINES_PER_WG.render(
-            PROCESS=args.process)
-        dst_data_hist = HLT2_OR_SPRUCING_DST_DATA_HIST.render(
-            PROCESS=args.process)
-        dst_bw_hist = HLT2_OR_SPRUCING_DST_BW_HIST.render(PROCESS=args.process)
-    elif args.process == 'spruce':
-        template = SPRUCE_REPORT_TEMPLATE.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            stream_config_json_wg=fname_helper.stream_config_json_path(
-                "wg", full_path=False),
-            table_wgstream_rates=table_main_stream_rates)
-        all_results = SPRUCING_ALL_RESULTS.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            line_descr=fname_helper.line_descr_path(full_path=False),
-            rate_table_split_by_wg_stream=fname_helper.
-            final_rate_table_all_lines_split_by_stream_path(
-                "wg", full_path=False),
-            PROCESS=args.process)
-        lines_per_wg = HLT2_OR_SPRUCING_LINES_PER_WG.render(
-            PROCESS=args.process)
-        dst_data_hist = HLT2_OR_SPRUCING_DST_DATA_HIST.render(
-            PROCESS=args.process)
-        dst_bw_hist = HLT2_OR_SPRUCING_DST_BW_HIST.render(PROCESS=args.process)
-
-    elif args.process == 'hlt1':
-        template = HLT1_REPORT_TEMPLATE.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            stream_config_json_wg=fname_helper.stream_config_json_path(
-                "streamless", full_path=False),
-            table_streamless_rates=table_main_stream_rates)
-        all_results = HLT1_ALL_RESULTS.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            PROCESS=args.process)
-        lines_per_wg = HLT1_LINES_PER_WG.render()
-        dst_data_hist = HLT1_DST_DATA_HIST.render()
-        dst_bw_hist = HLT1_DST_BW_HIST.render()
+
+    base_path = fname_helper.base_html_path(args.building_locally)
+    if args.skip_top_level_information_for_process_dependent_testpage:
+        top_level_header = ""
+        memory_consumption = ""
+    else:
+        top_level_header = render_top_level_header(args.script_path, base_path)
+        memory_consumption = render_memory_consumption()
 
     with open(
             fname_helper.html_page_outputs_path(f"{args.process}__index.html"),
             "w") as html_file:
         html = SINGLE_PROCESS_REPORT_TEMPLATE.render(
-            TOP_LEVEL_HEADER=""
-            if args.skip_top_level_information_for_process_dependent_testpage
-            else TOP_LEVEL_HEADER.render(
-                SCRIPTPATH=args.script_path,
-                BASE_PATH=fname_helper.base_html_path(args.building_locally)),
-            MEMORY_CONSUMPTION=""
-            if args.skip_top_level_information_for_process_dependent_testpage
-            else MEMORY_CONSUMPTION.render(),
-            TEMPLATE=template,
-            ALL_RESULTS=all_results,
-            LINES_PER_WG=lines_per_wg,
-            DST_DATA_HIST=dst_data_hist,
-            DST_BW_HIST=dst_bw_hist,
+            TOP_LEVEL_HEADER=top_level_header,
+            MEMORY_CONSUMPTION=memory_consumption,
+            BASE_PATH=base_path,
+            stream_config_json=fname_helper.stream_config_json_path(
+                args.stream_config, full_path=False),
+            main_rate_table=table_main_stream_rates,
+            BAR_CHARTS=render_bar_charts(args.process, args.stream_config,
+                                         args.streams),
+            LIST_OF_LINKS=list_of_links_html(args.process, fname_helper,
+                                             args.stream_config,
+                                             args.building_locally),
+            LINES_PER_WG=render_lines_pie_chart(args.process),
+            DST_DATA_HIST=render_dst_data_hists(args.process),
             INPUT_CONFIG_PATH=os.path.expandvars(args.input_config),
             INPUT_RATE=input_info['input_rate'],
             INPUT_NU=input_info['nu'],
@@ -715,100 +747,51 @@ if __name__ == '__main__':
             PROCESS=args.process)
         html_file.write(html)
 
-    with open(
-            fname_helper.process_dependent_html_page_outputs_path(
-                "other_lines.html"), "w") as html_file:
-        html = TABLE_OTHER_LINE_TEMPLATE.render(
-            table_other_lines=other_line_table)
-        html_file.write(html)
-
-    with open(
-            fname_helper.process_dependent_html_page_outputs_path(
-                "plots_per_wg.html"), "w") as html_file:
-        html = PLOTS_PER_WG_TEMPLATE.render(plots_per_wg=plots_per_wg)
-        html_file.write(html)
-
-    with open(
-            fname_helper.process_dependent_html_page_outputs_path(
-                "all_rates.html"), "w") as html_file:
-        html = ALL_RATE_TEMPLATE.render(
-            BASE_PATH=fname_helper.base_html_path(args.building_locally),
-            CSV_PATH=fname_helper.final_rate_table_all_lines_path(
-                "csv", full_path=False))
-        html_file.write(html)
-        with open(fname_helper.final_rate_table_all_lines_path("html"),
-                  "r") as rate_table:
-            html_file.write(rate_table.read())
-
-    stream_configs = {
-        "hlt1": ["streamless"],
-        "hlt2": ["production", "wg"],
-        "spruce": ["wg"]
-    }[args.process]
-
-    if args.process != "hlt1":
-        with open(
-                fname_helper.process_dependent_html_page_outputs_path(
-                    "similarity_matrices.html"), "w") as html_file:
-            html_file.write("""
-                <p>
-                    The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
-                    It shows how much events in the stream A are covered by another stream B. <br>
-                    The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
-                    i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
-                </p>
-                """)
-            for stream_config in stream_configs:
-                html_file.write(f"""
-                    <p>
-                        The overlap matrix of the {stream_config} streams is:
-                    </p>
-                    """)
-                with open(
-                        fname_helper.overlap_matrix_path(stream_config),
-                        "r") as overlap:
-                    html_file.write(overlap.read())
-            html_file.write("""
-                <p>
-                    The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
-                    It shows how similar the two streams are and is useful in bandwidth division. <br>
-                </p>
-                """)
-            for stream_config in stream_configs:
-                html_file.write(f"""
-                    <p>
-                        The Jaccard similarity matrix of the {stream_config} streams is:
-                    </p>
-                    """)
-            with open(
-                    fname_helper.jaccard_similarities_path(stream_config),
-                    "r") as jaccard:
-                html_file.write(jaccard.read())
-
-        with open(
-                fname_helper.process_dependent_html_page_outputs_path(
-                    "rates_streaming.html"), "w") as html_file:
-            for stream_config in stream_configs:
-                html_file.write(f"""
-                    <p>
-                    The rates, event sizes and bandwidths of the {stream_config} streams are:
-                    </p>
-                    """)
-                with open(
-                        fname_helper.final_rate_table_all_streams_path(
-                            stream_config), "r") as rate_html:
-                    html_file.write(rate_html.read())
+    # Extra pages
+    write_html_page(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "other_lines.html"),
+        render_other_line_table(args.process, other_line_list))
+
+    write_html_page(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "plots_per_wg.html"),
+        render_plots_per_wg_page(args.process, wg_list))
+
+    write_html_page(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "all_rates.html"),
+        render_all_lines_page(fname_helper, args.building_locally))
+
+    write_html_page(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "similarity_matrices.html"),
+        render_sim_matrices_page(args.process, fname_helper,
+                                 args.stream_config))
+
+    write_html_page(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "extra_bar_charts.html"),
+        render_bar_charts(
+            args.process,
+            args.stream_config,
+            args.streams,
+            metrics=['dstbandwidth']))
+
+    write_html_page(
+        fname_helper.process_dependent_html_page_outputs_path(
+            "extra_similarity_matrices.html"),
+        render_extra_sim_matrices(args.process, args.stream_config,
+                                  args.streams))
 
     with open(fname_helper.html_page_outputs_path("index.html"),
               "w") as html_file:
         if args.multiple_processes:
             html = HLT2_AND_SPRUCE_REPORT_TEMPLATE.render(
-                TOP_LEVEL_HEADER=TOP_LEVEL_HEADER.render(
-                    SCRIPTPATH=args.script_path,
-                    BASE_PATH=fname_helper.base_html_path(
-                        args.building_locally)),
-                BASE_PATH=fname_helper.base_html_path(args.building_locally),
-                MEMORY_CONSUMPTION=MEMORY_CONSUMPTION.render())
+                TOP_LEVEL_HEADER=render_top_level_header(
+                    args.script_path, base_path),
+                BASE_PATH=base_path,
+                MEMORY_CONSUMPTION=render_memory_consumption())
             html_file.write(html)
         else:
             # In single-process tests, need 'index.html' to be picked up.
diff --git a/python/PRConfig/bandwidth_helpers.py b/python/PRConfig/bandwidth_helpers.py
index 8bddb618..13b8d6ff 100644
--- a/python/PRConfig/bandwidth_helpers.py
+++ b/python/PRConfig/bandwidth_helpers.py
@@ -12,6 +12,53 @@
 import os
 import yaml
 
+KNOWN_WORKING_GROUPS = [
+    "B2CC",
+    "B2OC",
+    "BandQ",
+    "BnoC",
+    "Calib",
+    "Calo",
+    "Charm",
+    "DPA",
+    "HLT",
+    "IFT",
+    "Luminosity",
+    "PID",
+    "QCD",
+    "QEE",
+    "RD",
+    "RTA",
+    "SLB",
+    "Topo",
+    "Tagging",
+    "Tracking",
+    "TrackEff",
+    "HadInt",
+    "Monitoring",
+    "CutBasedDiLep",
+    "InclDetDiLep",
+]
+
+CUSTOM_WGS = {
+    "Hlt2Topo": "Topo",
+    "Hlt2CutBasedIncl": "CutBasedDiLep",
+    "Hlt2_InclDet": "InclDetDiLep"
+}
+
+
+def guess_wg(line_name, process):
+    # First, expect it follows naming convention e.g. <Hlt2,Spruce><WG>_MyLine
+    line_prefix = line_name.split("_")[0].removeprefix(process.capitalize())
+    if line_prefix in KNOWN_WORKING_GROUPS:
+        return line_prefix
+
+    for guess, custom_wg in CUSTOM_WGS.items():
+        if line_name.startswith(guess):
+            return custom_wg
+
+    return "Other"  # If you made it here, all guesses failed
+
 
 def parse_yaml(file_path):
     with open(os.path.expandvars(file_path), 'r') as f:
@@ -147,12 +194,30 @@ class FileNameHelper(object):
             self._join(self.process, stream_config, "overlap_matrix") +
             ".html")
 
+    def intra_stream_jaccard_similarities_path(self, stream_config, stream):
+        return os.path.join(
+            self.base_dir, self.output_subdir,
+            self._join(self.process, stream_config, stream,
+                       "jaccard_similarity_matrix") + ".html")
+
+    def intra_stream_overlap_matrix_path(self, stream_config, stream):
+        return os.path.join(
+            self.base_dir, self.output_subdir,
+            self._join(self.process, stream_config, stream, "overlap_matrix") +
+            ".html")
+
     def event_no_fname(self, stream_config, stream):
         return os.path.join(
             self.base_dir, self.output_subdir,
             self._join(self.process, "event_numbers", stream_config, stream) +
             ".json")
 
+    def intra_stream_event_no_fname(self, stream_config, stream):
+        return os.path.join(
+            self.base_dir, self.output_subdir,
+            self._join(self.process, "event_numbers_intra_stream",
+                       stream_config, stream) + ".json")
+
     def _tmp_rate_table_path(self, stream_config, stream, line_or_stream):
         return os.path.join(
             self.base_dir, self.output_subdir, self.inter_subsubdir,
@@ -165,6 +230,12 @@ class FileNameHelper(object):
     def tmp_rate_table_per_stream_path(self, stream_config, stream):
         return self._tmp_rate_table_path(stream_config, stream, "stream")
 
+    def tmp_rate_table_intra_stream_path(self, stream_config, stream):
+        return os.path.join(
+            self.base_dir, self.output_subdir, self.inter_subsubdir,
+            self._join(self.process, f"rates_wgs_within_{stream}",
+                       stream_config, stream) + ".csv")
+
     def final_rate_table_all_lines_path(self, ext="html", full_path=True):
         fname = self._join(self.process, "rates_for_all_lines") + f".{ext}"
         return os.path.join(self.base_dir, self.output_subdir,
@@ -195,3 +266,9 @@ class FileNameHelper(object):
     def process_dependent_html_page_outputs_path(self, fname):
         return os.path.join(self.base_dir, self.output_subdir,
                             self._join(self.process, fname))
+
+    def bar_chart_path(self, main_stream_config, stream, metric):
+        return os.path.join(
+            self.base_dir, self.output_subdir,
+            self._join(self.process, f'{metric}_bar_chart', main_stream_config,
+                       stream) + ".png")
diff --git a/scripts/benchmark-scripts/Moore_bandwidth_test.sh b/scripts/benchmark-scripts/Moore_bandwidth_test.sh
index 31758748..5312a7a9 100755
--- a/scripts/benchmark-scripts/Moore_bandwidth_test.sh
+++ b/scripts/benchmark-scripts/Moore_bandwidth_test.sh
@@ -118,7 +118,7 @@ case $PROCESS in
     EVENT_SIZE_UPPER_LIMIT=200
     GAUDIRUN_INPUT_PROCESS="Hlt1"
     OUTPUT_TYPE="MDF"
-    STREAM_CONFIGS=( "streamless" )
+    STREAM_CONFIG="streamless"
     case $INPUTDATA in
         nominal)
         CONFIG_FILE="${TEST_PATH_PREFIX}hlt1_bandwidth_input.yaml"
@@ -136,7 +136,7 @@ case $PROCESS in
     EVENT_SIZE_UPPER_LIMIT=200
     GAUDIRUN_INPUT_PROCESS="Hlt2"
     OUTPUT_TYPE="MDF"
-    STREAM_CONFIGS=( "wg" "production" )
+    STREAM_CONFIG="production"
     case $INPUTDATA in
         nominal)
         CONFIG_FILE="${TEST_PATH_PREFIX}hlt2_bandwidth_input_2024.yaml"
@@ -154,7 +154,7 @@ case $PROCESS in
     EVENT_SIZE_UPPER_LIMIT=300
     GAUDIRUN_INPUT_PROCESS="Spruce"
     OUTPUT_TYPE="ROOT"
-    STREAM_CONFIGS=( "wg" )
+    STREAM_CONFIG="wg"
     case $INPUTDATA in
         nominal)
         CONFIG_FILE="${TEST_PATH_PREFIX}spruce_bandwidth_input.yaml"
@@ -221,11 +221,9 @@ fi
 
 # 1. Run Moore.
 # -d downloads the input files locally for speed-up running Moore. Not helpful unless that download is fast for you (e.g. you're at CERN)
-for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
-    echo "Running trigger to obtain MDF/DST files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_FILE}"
-    time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_FILE -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py"
-    STORE_ERR_CODE
-done
+echo "Running trigger to obtain MDF/DST files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_FILE}"
+time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_FILE -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py"
+STORE_ERR_CODE
 
 # 2. Compute line descriptives: persist reco, extra output
 if [ $PROCESS = "hlt1" ]; then
@@ -236,39 +234,47 @@ else
     STORE_ERR_CODE
 fi
 
-for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do
-    # 3. Work out what the streams are from the config JSON; needed for later steps
-    STREAM_CONFIG_JSON_PATH=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.stream_config_json_path('${STREAM_CONFIG}'))"`)
-    STREAM_STR=`(jq -r 'keys | @sh' ${STREAM_CONFIG_JSON_PATH})`
-    declare -a STREAMS="($STREAM_STR)"
-    echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}"
-
-    # 4. Compute similarity matrices between streams by comparing event numbers
-    if [ $PROCESS = "hlt1" ]; then
-        echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"'
-    else
-        echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
-        for stream in "${STREAMS[@]}"; do
-            echo "Stream name: ${stream}"
-            time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream --file-type $OUTPUT_TYPE
-            STORE_ERR_CODE
-        done
-        time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]}
-        STORE_ERR_CODE
-    fi
+# 3. Work out what the streams are from the config JSON; needed for later steps
+STREAM_CONFIG_JSON_PATH=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.stream_config_json_path('${STREAM_CONFIG}'))"`)
+STREAM_STR=`(jq -r 'keys | @sh' ${STREAM_CONFIG_JSON_PATH})`
+declare -a STREAMS="($STREAM_STR)"
+echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}"
 
-    # 5. Computing rates per stream as well as per line (tables split by stream)
-    echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration"
+# 4. Compute similarity matrices between streams by comparing event numbers
+if [ $PROCESS = "hlt1" ]; then
+    echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"'
+else
+    echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration"
     for stream in "${STREAMS[@]}"; do
         echo "Stream name: ${stream}"
-        time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_FILE -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE
+        time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream --file-type $OUTPUT_TYPE
         STORE_ERR_CODE
     done
+    time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py inter_stream --streams ${STREAMS[@]} -p $PROCESS --stream-config $STREAM_CONFIG
+    STORE_ERR_CODE
+fi
+
+# 5. Computing rates per stream as well as per line (tables split by stream)
+echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration"
+for stream in "${STREAMS[@]}"; do
+    echo "Stream name: ${stream}"
+    time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_FILE -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE
+    STORE_ERR_CODE
 done
 
+# 6. Compute intra-stream overlap between WGs (only really envisaged for HLT2 where streams != WGs)
+if [ $PROCESS == "hlt2" ]; then
+    echo 'Computing intra-stream WG overlaps in each production stream'
+    for stream in "${STREAMS[@]}"; do
+        echo "Stream name: ${stream}"
+        time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py intra_stream --stream $stream -p $PROCESS --stream-config $STREAM_CONFIG
+        STORE_ERR_CODE
+    done
+fi
+
 # 6. Combine all output into tables
 echo 'Combining all rate and bandwidth tables'
-time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS
+time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS --stream-config $STREAM_CONFIG
 STORE_ERR_CODE
 
 # 7. Required information for 'hlt2-output-locally' or 'hlt2-output-from-eos' sprucing jobs.
@@ -278,6 +284,10 @@ if [ $PROCESS = "hlt2" ] && [ $INPUTDATA = "nominal" ]; then
     STORE_ERR_CODE
 fi
 
-# 8. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally
+# 8. Copy the stream config JSON from tmp/MDF -> tmp/Output so the handler can pick it up. Bit of a hack
+cp $STREAM_CONFIG_JSON_PATH tmp/Output/
+STORE_ERR_CODE
+
+# 9. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally
 echo 'Making plots and HTML pages'
-time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE $TOP_LEVEL_FLAG $TEST_PAGE_EXTRA_OPTS
+time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE --streams ${STREAMS[@]} --stream-config $STREAM_CONFIG $TOP_LEVEL_FLAG $TEST_PAGE_EXTRA_OPTS
-- 
GitLab