From 8d00d48c1aa472f763406f77d7f11396ec5f193f Mon Sep 17 00:00:00 2001 From: Ross John Hunter <ross.john.hunter@cern.ch> Date: Sat, 17 Feb 2024 08:42:54 +0100 Subject: [PATCH] [RTA/DPA BW TESTS]: Give the bandwidth test html pages a makeover Co-authored-by: Luke Grazette <l.grazette@warwick.ac.uk> --- python/MooreTests/calculate_stream_overlap.py | 83 +- python/MooreTests/combine_rate_output.py | 54 +- python/MooreTests/line-and-stream-rates.py | 332 ++++--- python/MooreTests/make_bandwidth_test_page.py | 925 +++++++++--------- python/PRConfig/bandwidth_helpers.py | 77 ++ .../benchmark-scripts/Moore_bandwidth_test.sh | 78 +- 6 files changed, 846 insertions(+), 703 deletions(-) diff --git a/python/MooreTests/calculate_stream_overlap.py b/python/MooreTests/calculate_stream_overlap.py index 86fa7049..777b342e 100755 --- a/python/MooreTests/calculate_stream_overlap.py +++ b/python/MooreTests/calculate_stream_overlap.py @@ -30,23 +30,21 @@ def get_all_event_numbers(args): return ret -def get_event_number_matrix(event_numbers_by_stream): +def get_event_number_matrix(event_numbers): all_event_numbers = set([ - evt_no for evt_no_list in event_numbers_by_stream.values() - for evt_no in evt_no_list + event_info for event_info_list in event_numbers.values() + for event_info in event_info_list ]) print( - f"Found {len(all_event_numbers)} unique event numbers across {len(event_numbers_by_stream.keys())} streams." + f"Found {len(all_event_numbers)} unique event numbers across {len(event_numbers.keys())} categories/streams." ) df = pd.DataFrame( - False, - index=list(all_event_numbers), - columns=event_numbers_by_stream.keys()) - for stream, evt_no_list in event_numbers_by_stream.items(): - for evt_no in evt_no_list: - df[stream][evt_no] = True + False, index=list(all_event_numbers), columns=event_numbers.keys()) + for stream, event_info_list in event_numbers.items(): + for event_info in event_info_list: + df[stream][event_info] = True return df @@ -84,44 +82,61 @@ def save(df, htmlpath): def main(): parser = argparse.ArgumentParser() - parser.add_argument( - '-p', - '--process', - type=str, - help='Compute for Hlt2 or Sprucing lines', - choices=['hlt2', 'spruce'], - required=True) - parser.add_argument( - '--stream-config', - type=str, - help='Choose production or per-WG stream configuration', - choices=['production', 'wg'], - required=True) - parser.add_argument('--streams', nargs='+', type=str, required=True) + subparsers = parser.add_subparsers(help='Mode of execution', dest='mode') + inter_stream_parser = subparsers.add_parser("inter_stream") + inter_stream_parser.add_argument( + '--streams', nargs='+', type=str, required=True) + + intra_stream_parser = subparsers.add_parser("intra_stream") + intra_stream_parser.add_argument('--stream', type=str, required=True) + for sp in [inter_stream_parser, intra_stream_parser]: + sp.add_argument( + '-p', + '--process', + type=str, + help='Compute for Hlt2 or Sprucing lines', + choices=['hlt2', 'spruce'], + required=True) + sp.add_argument( + '--stream-config', + type=str, + help='Choose production or per-WG stream configuration', + choices=['production', 'wg'], + required=True) args = parser.parse_args() + fname_helper = FileNameHelper(args.process) - event_numbers = get_all_event_numbers(args) - for stream in args.streams: - print( - f"Found {len(event_numbers[stream])} events for {stream} stream.") + if args.mode == "inter_stream": + similarities_path = fname_helper.jaccard_similarities_path( + args.stream_config) + overlaps_path = fname_helper.overlap_matrix_path(args.stream_config) + event_numbers = get_all_event_numbers(args) + elif args.mode == "intra_stream": + similarities_path = fname_helper.intra_stream_jaccard_similarities_path( + args.stream_config, args.stream) + overlaps_path = fname_helper.intra_stream_overlap_matrix_path( + args.stream_config, args.stream) + with open( + fname_helper.intra_stream_event_no_fname( + args.stream_config, args.stream), 'r') as f: + event_numbers = json.load(f) df = get_event_number_matrix(event_numbers) - ofile = fname_helper.jaccard_similarities_path(args.stream_config) sim_matrix = calculate_similarity_matrix(df) print( - f"Calculated similarity matrix. Printing and saving to html at {ofile}." + f"Calculated similarity matrix. Printing and saving to html at {similarities_path}." ) print(sim_matrix) - save(sim_matrix, ofile) + save(sim_matrix, similarities_path) - ofile = fname_helper.overlap_matrix_path(args.stream_config) overlap_matrix = calculate_overlap_matrix(df) print( - f"Calculated overlap matrix. Printing and saving to html at {ofile}.") + f"Calculated overlap matrix. Printing and saving to html at {overlaps_path}." + ) print(overlap_matrix) - save(overlap_matrix, ofile) + save(overlap_matrix, overlaps_path) if __name__ == "__main__": diff --git a/python/MooreTests/combine_rate_output.py b/python/MooreTests/combine_rate_output.py index 8497bbc1..5284e580 100755 --- a/python/MooreTests/combine_rate_output.py +++ b/python/MooreTests/combine_rate_output.py @@ -46,15 +46,15 @@ def _columns_per_line(process): } -def _sorted_df_by_retention(df): +def _sorted_df_by_bandwidth(df): return df.sort_values( - by=['Total Retention (%)'], ascending=False).reset_index(drop=True) + by=['Total Bandwidth (GB/s)'], ascending=False).reset_index(drop=True) -def rates_all_lines(stream_config, fname_helper, process): +def rates_all_lines(stream_config: str, fname_helper: FileNameHelper, + process: str): """Make 1 enormous table with rate/bw info per line for all lines in all streams (i.e. n_rows = n_lines). Saves to .csv and .html. - stream_config is either "production" or "wg" """ frames = [] @@ -67,7 +67,7 @@ def rates_all_lines(stream_config, fname_helper, process): df = pd.concat(frames) df.columns = _columns_per_line(process).keys() - df = _sorted_df_by_retention(df) + df = _sorted_df_by_bandwidth(df) df.to_csv(fname_helper.final_rate_table_all_lines_path("csv")) def highlight_vals(val, threshold, color='red'): @@ -93,11 +93,11 @@ def rates_all_lines(stream_config, fname_helper, process): return -def make_rate_table_row_per_line(stream_config, fname_helper, process): +def make_rate_table_row_per_line(stream_config: str, + fname_helper: FileNameHelper, process: str): """ Makes (1 table with rate/bw info per line in the streamed mdf) for all <stream_config> streams (i.e. n_tables = n_streams). Puts them all on 1 html page, adds hyperlinks to jump to the different streams on the page. Saves to .html page only. - stream_config is either "production" or "wg" """ with open( @@ -109,8 +109,15 @@ def make_rate_table_row_per_line(stream_config, fname_helper, process): fname_helper.get_stream_from_bw_path(file): file for file in files } - f.write('<head></head>\n<body>\n<p>') - f.write('Jump to:\n<ul>') + f.write('<head></head>\n<body>\n') + f.write(""" + <p> + Rates, event sizes and bandwidths of all lines in each stream, listed descending in bandwidth. <br> + Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br> + Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br> + </p> + """) + f.write('<p>Jump to:\n<ul>') for stream in files_by_stream.keys(): f.write( f'<li><a href="#{stream}_label"> {stream.upper()}</a></li>') @@ -121,7 +128,7 @@ def make_rate_table_row_per_line(stream_config, fname_helper, process): f.write(f'<a id="{stream}_label">') df = pd.read_csv(file, header=None) df.columns = _columns_per_line(process).keys() - df = _sorted_df_by_retention(df) + df = _sorted_df_by_bandwidth(df) f.write( df.style.format( '{:.3g}', subset=df.columns[df.columns != 'Line']). @@ -132,10 +139,10 @@ def make_rate_table_row_per_line(stream_config, fname_helper, process): return -def make_rate_table_row_per_stream(stream_config, fname_helper, process): +def make_rate_table_row_per_stream(stream_config: str, + fname_helper: FileNameHelper): """ Makes 1 table with rate/bw info integrated over the whole streamed mdf for all <stream_config> streams (i.e. a table with n_rows = n_streams). Saves to .html and .csv. - stream_config is either "production" or "wg" """ frames = [] @@ -147,7 +154,7 @@ def make_rate_table_row_per_stream(stream_config, fname_helper, process): df = pd.concat(frames) df.columns = COLUMNS_PER_STREAM - df = _sorted_df_by_retention(df) + df = _sorted_df_by_bandwidth(df) df.to_csv( fname_helper.final_rate_table_all_streams_path( stream_config, ext="csv")) @@ -173,17 +180,16 @@ if __name__ == "__main__": help='Compute for Hlt1, Hlt2 or Sprucing lines', choices=['hlt1', 'hlt2', 'spruce'], required=True) + parser.add_argument( + '--stream-config', + type=str, + choices=['streamless', 'production', 'wg'], + required=True) args = parser.parse_args() fname_helper = FileNameHelper(args.process) - stream_configs, main_stream_config = { - "hlt1": (["streamless"], "streamless"), - "hlt2": (["production", "wg"], "production"), - "spruce": (["wg"], "wg") - }[args.process] - - rates_all_lines(main_stream_config, fname_helper, args.process) - for stream_config in stream_configs: - make_rate_table_row_per_stream(stream_config, fname_helper, - args.process) - make_rate_table_row_per_line(stream_config, fname_helper, args.process) + + rates_all_lines(args.stream_config, fname_helper, args.process) + make_rate_table_row_per_stream(args.stream_config, fname_helper) + make_rate_table_row_per_line(args.stream_config, fname_helper, + args.process) diff --git a/python/MooreTests/line-and-stream-rates.py b/python/MooreTests/line-and-stream-rates.py index d8b754cb..fd1f9cce 100644 --- a/python/MooreTests/line-and-stream-rates.py +++ b/python/MooreTests/line-and-stream-rates.py @@ -13,45 +13,66 @@ import GaudiPython as GP from GaudiConf.reading import do_unpacking from Configurables import (ApplicationMgr, LHCbApp, IODataManager, EventSelector, LHCb__UnpackRawEvent, - HltDecReportsDecoder) + HltDecReportsDecoder, createODIN) from GaudiConf import IOHelper from PyConf.application import configured_ann_svc -import operator -from collections import Counter import json import argparse import csv -from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml +from pprint import pprint +from dataclasses import dataclass, field +from typing import List, Dict +from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS ''' + Run over MDF or DST file to compute: - When running production-stream config, returns: - - Per line (in form of single HTML table): + Per line (in form of single .csv table): 1. Inclusive retention 2. Inclusive rate 3. Exclusive retention 4. Exclusive rate - 5. Average DstData bank size - 6. DstData bandwidth - 7. Average event size (all banks in particular stream) - 8. Bandwidth - - Per stream in Turbo/Full/Turcal - 1. Inclusive retention - 2. Inclusive rate - 3. Average DstData bank size - 4. DstData bandwidth 5. Average event size (all banks in particular stream) 6. Bandwidth + 7. Average DstData bank size + 8. DstData bandwidth - When running wg-stream config, returns same figures as above (both per line and per stream) - When running streamless-stream config, returns just the per-line information. - + Per stream (i.e. the whole file) + 1. Inclusive retention + 2. Inclusive rate + 3. Average event size (all banks in particular stream) + 4. Bandwidth + 5. Average DstData bank size + 6. DstData bandwidth ''' LHCb = GP.gbl.LHCb RAW_BANK_TYPES = [(i, LHCb.RawBank.typeName(i)) for i in range(LHCb.RawBank.LastType)] +B_to_kB = 1e-3 +MBps_to_GBps = 1e-3 + + +@dataclass +class LineBW: + triggered: List[int] = field( + default_factory=lambda: []) # list of ~event numbers + raw: float = 0 # Stores whole event size for every event that fires this line (inclusive) + dst: float = 0 # Just DstData raw bank size, as above + + +@dataclass +class FileBW: + triggered: Dict[int, int] = field( + default_factory=lambda: dict()) # {event_no: n_times_triggered} + raw: float = 0 + dst: float = 0 + + +@dataclass +class WGBW: + n_triggered: int = 0 + raw: float = 0 + dst: float = 0 def rawbank_sizes(rawevent, lst): @@ -68,52 +89,33 @@ def rawbank_sizes(rawevent, lst): return [(name, size(i)) for i, name in lst] -def processing_events_per_line_and_stream(evt_max, lines, process, stream): +def rate_info_from_file(evt_max, lines, process, stream): + '''Loop through the file to work out per-line and per-stream triggers and + event sizes. Outputs used by functions below to compute rates and bandwidths. ''' - Returns, per line: - i) How many events triggered on - ii) How many unique events triggered on - iii) Average DstData size of all events - iv) Average size of all events - - Returns, per stream: - i) How many events triggered on - ii) Average DstData size of all events - iii) Average size of all events - ''' - # Per file (stream) information - events_file = 0 - raw_size_all = 0 - dst_size_all = 0 - - # Per line information - # Stores how many events each line fired on - event_stats = { - line: [] - for line in [line + 'Decision' for line in list(lines)] - } - - # Stores whole event size size - raw = {line: 0 for line in [line + 'Decision' for line in list(lines)]} + decisions = [line + 'Decision' for line in list(lines)] - # Stores DstData bank size - dst = {line: 0 for line in [line + 'Decision' for line in list(lines)]} + totals = FileBW() # Per file (stream) information + line_totals = {dec: LineBW() for dec in decisions} # per line - exclusive = {} + # This block only used for hlt2 + event_info = {wg: set() for wg in KNOWN_WORKING_GROUPS} + event_info['Other'] = set() + event_info['TotalInclusive'] = set() + unknown_lines = set() # Loop over all events analysed = 0 while analysed < evt_max: analysed += 1 - exclusive.update({analysed: 0}) - # Run an event appMgr.run(1) report = evt[f'/Event/{process.capitalize()}/DecReports'] # Sprucing has stream-dependent raw event locations - different to HLT1/2 rawevent = evt[ f'/Event/{stream if process == "spruce" else "DAQ"}/RawEvent'] + header = evt["/Event/myODIN"] evtsize = sum( bank[1] for bank in rawbank_sizes(rawevent, RAW_BANK_TYPES)) @@ -122,103 +124,135 @@ def processing_events_per_line_and_stream(evt_max, lines, process, stream): # Will quit running if there are no more events in the input file if report: - # Count per file/stream - events_file += 1 - raw_size_all += evtsize - dst_size_all += dstsize - for line in event_stats.keys(): - # Count per line - if report.decReport(line): - if report.decReport(line).decision() == 1: - event_stats[line].append(analysed) - exclusive[analysed] += 1 - raw[line] += evtsize - dst[line] += dstsize + # Info per file/stream + totals.triggered.update({analysed: 0}) + totals.raw += evtsize + totals.dst += dstsize + for dec in line_totals.keys(): + # Info per line + if report.decReport(dec) and report.decReport( + dec).decision() == 1: + totals.triggered[analysed] += 1 + line_totals[dec].triggered.append(analysed) + line_totals[dec].raw += evtsize + line_totals[dec].dst += dstsize + + # Info per WG within the stream + if process == "hlt2": + info = (header.eventNumber(), evtsize, dstsize) + event_info['TotalInclusive'].add(info) + wg = guess_wg(dec, process) + event_info[wg].add(info) + if wg == "Other": + unknown_lines.add(dec) else: break - # First three variables per stream/file, last four for lines - return events_file, raw_size_all, dst_size_all, event_stats, exclusive, raw, dst - - -def rates_per_line(event_stats, exclusive, raw, dst, input_rate, - output_file_path): - - data = [] - - # Compute exclusive rate - sort = dict( - sorted( - {k: v - for (k, v) in exclusive.items() if v > 0}.items(), - key=operator.itemgetter(1), - reverse=True)) - - unique_events = [key for key, value in sort.items() if value == 1] - - for line, val in event_stats.items(): - events_all = val + unique_events - num_events = len(event_stats[line]) - row_values = ( - line, - num_events / LHCbApp().EvtMax * 100 - if num_events else 0, # Inclusive Retention (expressed as %) - num_events / LHCbApp().EvtMax * input_rate - if num_events else 0, # Inclusive Rate (in kHz) - len([ - key for key, value in Counter(events_all).items() if value > 1 - ]) / LHCbApp().EvtMax * 100 - if num_events else 0, # Exclusive retention (expressed as %) - len([ - key for key, value in Counter(events_all).items() if value > 1 - ]) / LHCbApp().EvtMax * input_rate - if num_events else 0, # Exclusive rate (in kHz) - raw[line] / num_events * 1e-3 - if num_events else 0, # Average event size (in kB) - (num_events / LHCbApp().EvtMax * raw[line] / num_events) * - input_rate / 1e6 if num_events else 0, # Event bandwidth (in GB/s) - dst[line] / len(event_stats[line]) * 1e-3 - if num_events else 0, # Average DstData size (in kB) - (num_events / LHCbApp().EvtMax * dst[line] / num_events) * - input_rate / 1e6 if num_events else 0 - ) # DstData Bandwidth (in GB/s) - - data.append(row_values) - - with open(output_file_path, 'w') as f: + + if len(unknown_lines): + print(f"Found {len(unknown_lines)} unknown lines. They are...") + pprint(unknown_lines) + + #Â Remove trigger-less WGs, and transform out of tuple for easier & more-robust lookups later + wg_event_numbers = {} + wg_bws = {} + if process == "hlt2": + for wg, event_info_set in event_info.items(): + event_numbers = [info[0] for info in event_info_set] + if len(event_numbers) == 0: continue + + wg_event_numbers[wg] = event_numbers + wg_bws[wg] = WGBW( + n_triggered=len(event_numbers), + raw=sum([info[1] for info in event_info_set]), + dst=sum([info[2] for info in event_info_set])) + + return totals, line_totals, wg_bws, wg_event_numbers + + +def _write_to_csv(output_path, rows): + with open(output_path, 'w') as f: csv_out = csv.writer(f) - for tup in data: - csv_out.writerow(tup) + for row in rows: + csv_out.writerow(row) + + +def rates_per_line(line_totals: dict[str, LineBW], file_totals: FileBW, + input_rate: float, output_file_path: str) -> None: + + rows = [] + for line, bw_info in line_totals.items(): + + n_events = LHCbApp().EvtMax + n_fired = len(bw_info.triggered) + retention = n_fired / n_events + rate = retention * input_rate # kHz, inclusive + + n_fired_excl = len([ + evt for evt in bw_info.triggered if file_totals.triggered[evt] == 1 + ]) + excl_retention = n_fired_excl / n_events + excl_rate = excl_retention * input_rate # kHz + + avg_evt_size = bw_info.raw * B_to_kB / n_fired if n_fired else 0 # kB + avg_dst_size = bw_info.dst * B_to_kB / n_fired if n_fired else 0 # kB + bw = rate * avg_evt_size * MBps_to_GBps # GB/s + dst_bw = rate * avg_dst_size * MBps_to_GBps # GB/s + + rows.append([ + line, retention * 100, rate, excl_retention * 100, excl_rate, + avg_evt_size, bw, avg_dst_size, dst_bw + ]) + + _write_to_csv(output_file_path, rows) return -def rates_per_stream(events, raw_size, dst_size, streamname, input_rate, - output_file_path): +def rates_per_stream(file_totals: FileBW, stream: str, input_rate: float, + output_file_path: str) -> None: - data = [] + n_events = LHCbApp().EvtMax + n_fired = len(file_totals.triggered) + retention = n_fired / n_events + rate = retention * input_rate - row_values = ( - streamname, - events / LHCbApp().EvtMax * 100 - if events else 0, # Inclusive Retention (expressed as %) - events / LHCbApp().EvtMax * input_rate - if events else 0, # Inclusive Rate (in kHz) - raw_size / events * 1e-3 - if events else 0, # Average event size (in kB) - (events / LHCbApp().EvtMax * raw_size / events) * input_rate / 1e6 - if events else 0, # Event bandwidth (in GB/s) - dst_size / events * 1e-3 - if events else 0, # Average DstData size (in kB) - (events / LHCbApp().EvtMax * dst_size / events) * input_rate / 1e6 - if events else 0) # DstData Bandwidth (in GB/s) + avg_evt_size = file_totals.raw * B_to_kB / n_fired if n_fired else 0 + avg_dst_size = file_totals.dst * B_to_kB / n_fired if n_fired else 0 - data.append(row_values) + bw = rate * avg_evt_size * MBps_to_GBps # GB/s + dst_bw = rate * avg_dst_size * MBps_to_GBps # GB/s - with open(output_file_path, 'w') as f: - csv_out = csv.writer(f) - for tup in data: - csv_out.writerow(tup) + row = (stream, retention * 100, rate, avg_evt_size, bw, avg_dst_size, + dst_bw) + _write_to_csv(output_file_path, [row]) + return + + +def rates_per_wg_intra_stream(wg_bws: dict[str, WGBW], input_rate: float, + output_file_path: str) -> None: + # Get inclusive rates/bandwidths of each WG within this stream + + n_events = LHCbApp().EvtMax + wg_bw_infos = dict() + for wg, bw_info in wg_bws.items(): + rate = bw_info.n_triggered * input_rate / n_events # kHz + wg_bw_infos[wg] = [ + rate, + input_rate * (bw_info.raw * B_to_kB) * MBps_to_GBps / + n_events, # bandwidth, GBs + input_rate * (bw_info.dst * B_to_kB) * MBps_to_GBps / + n_events, # dst bandwidth GB/s + ] + + n_metrics = len(wg_bw_infos['TotalInclusive']) + wg_bw_infos['SumWGs'] = [ + sum(bw_info[i] for wg, bw_info in wg_bw_infos.items() + if wg != "TotalInclusive") for i in range(n_metrics) + ] + rows = [[wg] + bw_info for wg, bw_info in wg_bw_infos.items()] + + _write_to_csv(output_file_path, rows) return @@ -307,6 +341,11 @@ if __name__ == '__main__': # TODO do_unpacking will try to unpack HLT2 dec banks if spruce # leads to annoying ERROR messages that do nothing. algs = do_unpacking(**raw_event_unpacker_kwargs) + algs += [ + LHCb__UnpackRawEvent( + BankTypes=['ODIN'], RawBankLocations=["DAQ/RawBanks/ODIN"]), + createODIN(ODIN="myODIN"), + ] appMgr = ApplicationMgr(TopAlg=algs) appMgr.ExtSvc += [ @@ -314,18 +353,31 @@ if __name__ == '__main__': ] appMgr = GP.AppMgr() evt = appMgr.evtsvc() - i_rate = int(input_config['input_rate']) + input_rate = int(input_config['input_rate']) - evts_all, rawbanks_all, dst_all, event_stats, exclusive, raw, dst = processing_events_per_line_and_stream( + file_totals, line_totals, wg_bws_for_hlt2, wg_event_numbers_for_hlt2 = rate_info_from_file( LHCbApp().EvtMax, lines, args.process, args.stream) # Calculate key quantities per stream rates_per_stream( - evts_all, rawbanks_all, dst_all, args.stream, i_rate, + file_totals, args.stream, input_rate, fname_helper.tmp_rate_table_per_stream_path(args.stream_config, args.stream)) # Calculate key quantities per line rates_per_line( - event_stats, exclusive, raw, dst, i_rate, + line_totals, file_totals, input_rate, fname_helper.tmp_rate_table_per_line_path(args.stream_config, args.stream)) + + if args.process == "hlt2": + # Save the event numbers for WG-by-WG overlaps + event_number_file = fname_helper.intra_stream_event_no_fname( + args.stream_config, args.stream) + with open(event_number_file, 'w') as f: + json.dump(wg_event_numbers_for_hlt2, f) + + # Calculate rates/bws within a stream + rates_per_wg_intra_stream( + wg_bws_for_hlt2, input_rate, + fname_helper.tmp_rate_table_intra_stream_path( + args.stream_config, args.stream)) diff --git a/python/MooreTests/make_bandwidth_test_page.py b/python/MooreTests/make_bandwidth_test_page.py index cc79f147..52cd827d 100644 --- a/python/MooreTests/make_bandwidth_test_page.py +++ b/python/MooreTests/make_bandwidth_test_page.py @@ -12,45 +12,15 @@ import argparse import jinja2 import matplotlib.pyplot as plt import pandas as pd -import yaml import os from math import log10 from dataclasses import dataclass, field from typing import List from collections import namedtuple -from PRConfig.bandwidth_helpers import FileNameHelper +from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS plt.ioff() -TOP_LEVEL_HEADER = jinja2.Template(""" -<p> - slot.build_id: $$version$$<br> - start time: $$start_time$$<br> - end time: $$end_time$$<br> - platform: $$platform$$<br> - hostname: $$hostname$$<br> - cpu_info: $$cpu_info$$<br> - testing script path: {{SCRIPTPATH}} -</p> -<ul> - <li><a href="{{BASE_PATH}}/run.log">Logs</a></li> -</ul> -""") - -MEMORY_CONSUMPTION = jinja2.Template(""" -<object type="image/png" data="memory_consumption.png"></object> -<p> - Memory consumption as functions of Wall-time. <br> - The virtual memory size is the total amount of memory the process may hypothetically access. <br> - The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br> - The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br> - As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br> - Swap memory is used when RAM is full. <br> - The maximum resident set size usage is $$max_rss$$ GB. <br> - The maximum proportional set size usage is $$max_pss$$ GB. <br> -</p> -""") - SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template(""" <html> <head></head> @@ -60,25 +30,38 @@ SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template(""" <b>{{EXIT_CODE_SENTENCE}}</b> </p> <p> - Results per working group and stream: - <ul> - <li>Inclusive retention and rate</li> - <li>(Jaccard) similarity matrix</li> - <li>(Conditional) overlap matrix</li> - <li>Average DstData size and bandwidth</li> - <li>Average event size and bandwidth</li> - </ul> + This page contains the results of the {{PROCESS}} bandwidth test. Main results: </p> +{{main_rate_table}} <p> - Results per line: all of the above, plus - <ul> - <li>Exclusive retention and rate</li> - <li>Descriptives (whether persistreco and/or extra outputs is enabled)</li> - </ul> + The streaming configuration (i.e. which lines went to each stream) can be found in JSON format + <a href="{{BASE_PATH}}/{{stream_config_json}}">here</a>. <br> + This streaming configuration is our current set of lines to be used in the next data-taking period. <br> + "DstData" is the raw bank to which reconstructed information (candidates, other reconstructed tracks etc.) are saved. <br> + The "DstData bandwidth" is therefore the bandwidth counting only that raw bank. <br> + The total event size (and total bandwidth) count all raw banks (incl. DstData, and detector raw banks if present) in the file. <br> </p> +<p> Scroll down to see: </p> +<ul> + <li> Bar charts of rate and bandwidth for each WG within each stream (HLT2 only), </li> + <li> A pie chart of all lines split by WGs (HLT2 and sprucing only), </li> + <li> Information about the input sample, </li> + <li> Stacked histograms of all lines, split by WG, of rate/bandwidth metrics, </li> + <li> Memory consumption of the test as a function of time. </li> +</ul> +<p> + Further results can be found in the links below: +</p> +<ul> + {{LIST_OF_LINKS}} + $${{PROCESS}}__comparison$$ + </b></b> +</ul> <p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p> +{{BAR_CHARTS}} +{{LINES_PER_WG}} <p> - Input sample information: + <b>Input sample information:</b> <ul> <li>Config file: {{INPUT_CONFIG_PATH}}</li> <li>Input rate: {{INPUT_RATE}} kHz</li> @@ -86,46 +69,18 @@ SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template(""" <li>Radius of VELO opening: {{INPUT_VELO_RADIUS}} mm</li> </ul> </p> -{{TEMPLATE}} <p> - Other results are shown by plots or tables (in the links) below. <br> + <b>Stacked histograms of all lines, split by WG, of rate/bandwidth metrics:</b> <br> + The total distributions are shown as a stacked histogram, split into several histograms of WGs. <br> + The distributions per WG is attached in the html page linked above. <br> + Total event size is calculated from summing all raw banks in the file (including DstData). <br> + Where appropriate, the DstData raw bank size and DstData bandwidth are calculated from summing only the DstData raw bank. <br> </p> -{{LINES_PER_WG}} +<object type="image/png" data="{{PROCESS}}__hist__tot_bandwidth.png"></object> <object type="image/png" data="{{PROCESS}}__hist__rate.png"></object> -<p> - Distribution of rate of selection lines. <br> - The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br> - The distributions per WG is attached in the html page below. <br> - A line is considered to be "problematic" if it has a rate of 0 Hz - or larger than 1 kHz, which requires some attention. <br> - The rates of all lines are listed in a html page attached below. <br> -</p> - -{{DST_DATA_HIST}} - <object type="image/png" data="{{PROCESS}}__hist__total_size.png"></object> -<p> - Distribution of total event size of selection lines. <br> - The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br> - The distributions per WG is attached in the html page below. <br> - A line is considered to be "problematic" if its DstData size or total event size - is larger than 1 MB, which requires some attention. <br> - The event sizes of all lines are listed in a html page attached below. <br> -</p> - -{{DST_BW_HIST}} - -<object type="image/png" data="{{PROCESS}}__hist__tot_bandwidth.png"></object> -<p> - Distribution of bandwidth computed from total event size. <br> - The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br> - The distributions per WG is attached in the html page below. <br> - Currently, a line is considered to be "problematic" if its bandwidth from DstData size - is larger than 200 MB/s, which requires some attention. This is a temporary limit. <br> - The event sizes of all lines are listed in a html page attached below. <br> -</p> +{{DST_DATA_HIST}} {{MEMORY_CONSUMPTION}} -{{ALL_RESULTS}} </body> </html> """) @@ -140,168 +95,13 @@ HLT2_AND_SPRUCE_REPORT_TEMPLATE = jinja2.Template(""" The appropriate process-specific webpages can be found below. </p> <ul> - <li><a href="{{BASE_PATH}}/hlt2__index.html">Hlt2 index</a></li> - <li><a href="{{BASE_PATH}}/spruce__index.html">Spruce index</a></li> + <li><a href="{{BASE_PATH}}/hlt2__index.html">Hlt2 results</a></li> + <li><a href="{{BASE_PATH}}/spruce__index.html">Sprucing results</a></li> </ul> {{MEMORY_CONSUMPTION}} </body> </html>""") -HLT1_REPORT_TEMPLATE = jinja2.Template("""<p> - The bandwidth test was run under a single streamless configuration. <br> - The definition of the configuration can be found below. -</p> -<ul> - <li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">Streamless configuration</a></li> -</ul> -<p> - The streamless configuration is representative of data taking. <br> - The rates, event sizes and bandwidth results from the streamless configuration is: <br> -</p> -<p> -</p> -{{table_streamless_rates}}""") - -HLT2_REPORT_TEMPLATE = jinja2.Template("""<p> - The bandwidth test was run under 2 streaming configurations: production streams (Full, Turbo etc.) and one stream per WG. <br> - The definition of the production streaming and working-group streaming can be found below. -</p> -<ul> - <li><a href="{{BASE_PATH}}/{{stream_config_json_prod}}">Production-stream configuration</a></li> - <li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">WG-stream configuration</a></li> -</ul> -<p> - The production stream configuration reflects the streaming we will have for data taking. <br> - The rates, event sizes and bandwidths results from production-stream configuration is: <br> -</p> -{{table_5stream_rates}}""") - -SPRUCE_REPORT_TEMPLATE = jinja2.Template("""<p> - The bandwidth test was run under 1 streaming configuration: one stream per WG. <br> - The definition of per-WG-stream configuration can be found below. -</p> -<ul> - <li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">WG-stream configuration</a></li> -</ul> -<p> - The wg-stream configuration is close to what we will have for data taking. <br> - The rates, event sizes and bandwidths results from wg-stream configuration is: <br> -</p> -{{table_wgstream_rates}}""") - -HLT1_ALL_RESULTS = jinja2.Template(""" -<ul> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li> - $${{PROCESS}}__comparison$$ -</ul> -""") - -HLT2_ALL_RESULTS = jinja2.Template(""" -<ul> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__other_lines.html">Show list of lines in "Other" category</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__plots_per_wg.html">Show plots split by WGs</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__similarity_matrices.html"> Show similarity Jaccards and overlap matrices between streams for different stream configurations</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__rates_streaming.html"> Show rates of streams under different configurations</a></li> - <li><a href="{{BASE_PATH}}/{{line_descr}}"> PersistReco and ExtraOutput for selection lines</a></li> - <li><a href="{{BASE_PATH}}/{{rate_table_split_by_wg_stream}}"> Split by working group: rates, event sizes and bandwidths of all lines</a></li> - <li><a href="{{BASE_PATH}}/{{rate_table_split_by_prod_stream}}"> Split by production stream: rates, event sizes and bandwidths of all lines</a></li> - $${{PROCESS}}__comparison$$ - </b></b> -</ul> -""") - -SPRUCING_ALL_RESULTS = jinja2.Template(""" -<ul> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__other_lines.html">Show list of lines in "Other" category</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__plots_per_wg.html">Show plots split by WGs</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__similarity_matrices.html"> Show similarity Jaccards and overlap matrices between streams for different stream configurations</a></li> - <li><a href="{{BASE_PATH}}/{{PROCESS}}__rates_streaming.html"> Show rates of streams under different configurations</a></li> - <li><a href="{{BASE_PATH}}/{{line_descr}}"> PersistReco and ExtraOutput for selection lines</a></li> - <li><a href="{{BASE_PATH}}/{{rate_table_split_by_wg_stream}}"> Split by working group: rates, event sizes and bandwidths of all lines</a></li> - $${{PROCESS}}__comparison$$ - </b></b> -</ul> -""") - -HLT1_LINES_PER_WG = jinja2.Template("""""") -HLT1_DST_DATA_HIST = jinja2.Template("""""") -HLT1_DST_BW_HIST = jinja2.Template("""""") - -HLT2_OR_SPRUCING_LINES_PER_WG = jinja2.Template(""" -<object type="image/png" data="{{PROCESS}}__lines_per_wg.png"></object> -<p> - The number of selection lines per working group. <br> - "Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br> - To make lines properly categorized, one should follow the naming convention, - name of lines should start with `Hlt1/Hlt2/Spruce[WG]_`. -</p> -""") - -HLT2_OR_SPRUCING_DST_DATA_HIST = jinja2.Template(""" -<object type="image/png" data="{{PROCESS}}__hist__dst_data_size.png"></object> -<p> - Distribution of DstData RawBank size of selection lines. <br> - The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br> - The distributions per WG is attached in the html page below. -</p> -""") - -HLT2_OR_SPRUCING_DST_BW_HIST = jinja2.Template(""" -<object type="image/png" data="{{PROCESS}}__hist__dst_bandwidth.png"></object> -<p> - Distribution of bandwidth computed from DstData RawBank size. <br> - The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br> - The distributions per WG is attached in the html page below. -</p> -""") - -TABLE_OTHER_LINE_TEMPLATE = jinja2.Template(""" -<p> - List of line names that categorized to "Others". -</p> -{{table_other_lines}} -""") - -PLOTS_PER_WG_TEMPLATE = jinja2.Template(""" -<p> - Plots of rates, event sizes and bandwidths for lines, split into different WGs. -</p> -{{plots_per_wg}} -""") - -ALL_RATE_TEMPLATE = jinja2.Template(""" -<p> - Rates, event sizes and bandwidths of all lines, listed descending in retention rates. <br> - The results are obtained by a per-event analysing under 5-stream configuration. <br> - These numbers are also saved in a csv file: <a href="{{BASE_PATH}}/{{CSV_PATH}}">{{CSV_PATH}}</a> -</p> -""") - -known_working_groups = [ - "B2CC", - "B2OC", - "BandQ", - "BnoC", - "Calib", - "Calo", - "Charm", - "DPA", - "HLT", - "IFT", - "Luminosity", - "PID", - "QCD", - "QEE", - "RD", - "RTA", - "Simulation", - "SL", - "Tagging", - "Tracking", -] - @dataclass class WGRateBWInfo: @@ -355,14 +155,13 @@ def histo_maker(entry_list, def make_plots_per_wg(fname_helper, wg_name, wg_bw_info, process): ''' - Make plots of rates and event sizes for each WG. + Make plots of rates/bandwidths and event sizes for each WG. Arguments: + fname_helper: instance of FileNameHelper wg_name: name of the working group - rate_list: list containing rates of all lines from the WG - dst_size_list: list containing DstData Rawbank size of all lines from the WG - tot_size_list: list containing total event size of all lines from the WG - process: either `hlt2` or `spruce` + wg_bw_info: dict(wg_name: WGRateBWInfo object) + process: `hlt1`, `hlt2` or `spruce` ''' title = f"{wg_name} {process.capitalize()}" @@ -381,52 +180,47 @@ def make_plots_per_wg(fname_helper, wg_name, wg_bw_info, process): f"hist__{plot_bit}__{wg_name}.png")) -def make_plots(all_lines_bw_info, - tot_rate, - tot_bandwidth, - process, - wgs=known_working_groups): +def make_plots(all_lines_bw_info, tot_rate, tot_bandwidth, fname_helper, + process): ''' - Make plots of rate and event sizes of all lines. - It will create three stacked histograms containing distributions of all lines, - and a pie chart showing the number of lines per WG. + Make plots of rate, bandwidth and event sizes of all lines. + It will create 5 stacked histograms containing distributions of all lines + grouped by WG, and a pie chart showing the number of lines per WG. Arguments: - rate_dict: dictionary of line names and their rates - tot_rate: total rate of all lines - evt_size_dict: dictionary of line names and their event sizes - process: either `hlt2` or `spruce` - wgs: list of working groups to categorize + all_lines_bw_info: dict(line_name: LineRateBWInfo object) + tot_rate: total rate of all lines (arithmetic sum of stream rates) + tot_bandwidth: total bandwidth of all lines (arithmetic sum of stream BWs) + fname_helper: instance of FileNameHelper + process: `hlt1`, `hlt2` or `spruce` + + Returns: + - list of found WGs with >= 1 line + - list of lines that didnt fit into 1 WG ''' # Count number of lines and rates/evt sizes per WG - rate_info_per_wg = {wg: WGRateBWInfo() for wg in wgs + ["Other"]} + rate_info_per_wg = { + wg: WGRateBWInfo() + for wg in KNOWN_WORKING_GROUPS + ["Other"] + } list_other_lines = [] for line, bw_info in all_lines_bw_info.items(): - found_wg = False - # Expect e.g {Hlt1,Hlt2,Spruce}<WG>_<rest-of-line-name> - wg_guess = line.split("_")[0].removeprefix(process.capitalize()) - for wg in rate_info_per_wg.keys(): - if wg_guess.startswith(wg): - found_wg = True - rate_info_per_wg[wg].nlines += 1 - for attrib in [ - "rate", "dst_size", "tot_size", "dst_bw", "tot_bw" - ]: - getattr(rate_info_per_wg[wg], attrib).append( - getattr(bw_info, attrib)) - if not found_wg: + wg_guess = guess_wg(line, process) + rate_info_per_wg[wg_guess].nlines += 1 + if wg_guess == "Other": list_other_lines.append(line) - rate_info_per_wg["Other"].nlines += 1 - for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]: - getattr(rate_info_per_wg["Other"], attrib).append( - getattr(bw_info, attrib)) + + for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]: + getattr(rate_info_per_wg[wg_guess], attrib).append( + getattr(bw_info, attrib)) + rate_info_per_wg = { k: info for k, info in rate_info_per_wg.items() if info.nlines != 0 } - # Make a pie plot of lines per WG + # Make a pie chart of lines per WG labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()] fig = plt.figure() plt.pie([v.nlines for v in rate_info_per_wg.values()], @@ -434,10 +228,13 @@ def make_plots(all_lines_bw_info, labels=labels, wedgeprops=dict(width=0.4, edgecolor="w")) plt.title(f"Number of {process.capitalize()} lines per WG") - plt.savefig(f"tmp/Output/{args.process}__lines_per_wg.png", format="png") + plt.savefig( + fname_helper.process_dependent_html_page_outputs_path( + "lines_per_wg.png"), + format="png") plt.close(fig) - ### Make hist plots + # Stacked histograms title = f"{process.capitalize()}" for attrib, xtitle, title, plot_bit, take_log, log_th, range in zip( ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"], [ @@ -473,39 +270,294 @@ def make_plots(all_lines_bw_info, return rate_info_per_wg.keys(), list_other_lines -def make_other_line_table(name_list): - table_html_str = r'''<table border = "1"> - <tr> - <th> Name </th> - </tr>''' - for name in name_list: - table_html_str += ''' - <tr> - <td> %s </td> - </tr>''' % name - table_html_str += '\n</table>' - return table_html_str - - -def make_plots_per_wg_list(wg_list, process): - list_html_str = '' - for wg_name in wg_list: - list_html_str += f''' +def make_bar_charts(rates_df, column, stream, plot_path): + """Bar charts of the WG-by-WG rates within 1 stream""" + + fig = plt.figure() + plt.grid(True, axis='y', zorder=0, linestyle='dashed') + bars = plt.bar(rates_df['WG'], rates_df[column], zorder=3) + plt.bar_label(bars, fmt='%.1f') + plt.ylabel(column) + plt.xticks(rates_df['WG'], rates_df['WG'], rotation='vertical') + plt.subplots_adjust(bottom=0.25) + plt.title(f'{column} for each WG in the {stream.capitalize()} stream') + plt.savefig(plot_path, format="png") + plt.close(fig) + + +def write_html_page(page_path, rendered_html): + if rendered_html: + with open(page_path, "w") as html_file: + html_file.write(rendered_html) + + +def _render(html_str): + return jinja2.Template(html_str).render() + + +def render_all_lines_page(fname_helper, building_locally): + csv_path = fname_helper.final_rate_table_all_lines_path( + "csv", full_path=False) + html_str = f""" + <p> + Rates, event sizes and bandwidths of all lines, listed descending in bandwidth. <br> + Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br> + Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br> + These numbers are also saved in a csv file: <a href="{fname_helper.base_html_path(building_locally)}/{csv_path}">{csv_path}</a> + </p> + """ + with open(fname_helper.final_rate_table_all_lines_path("html"), + "r") as rate_table: + html_str += rate_table.read() + return _render(html_str) + + +def render_top_level_header(script_path, base_path): + return _render(f""" + <p> + slot.build_id: $$version$$<br> + start time: $$start_time$$<br> + end time: $$end_time$$<br> + platform: $$platform$$<br> + hostname: $$hostname$$<br> + cpu_info: $$cpu_info$$<br> + testing script path: {script_path} + </p> + <ul> + <li><a href="{base_path}/run.log">Logs</a></li> + </ul> + """) + + +def render_memory_consumption(): + return _render(""" <p> - Plots of {wg_name} group: + <b> Memory consumption of this test: </b> </p> - <object type="image/png" data="{process}__hist__rate__{wg_name}.png"></object> - <object type="image/png" data="{process}__hist__dst_data_size__{wg_name}.png"></object> - <object type="image/png" data="{process}__hist__total_size__{wg_name}.png"></object> - <object type="image/png" data="{process}__hist__dst_bandwidth__{wg_name}.png"></object> - <object type="image/png" data="{process}__hist__tot_bandwidth__{wg_name}.png"></object> + <object type="image/png" data="memory_consumption.png"></object> + <p> + Memory consumption as functions of Wall-time. <br> + The virtual memory size is the total amount of memory the process may hypothetically access. <br> + The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br> + The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br> + As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br> + Swap memory is used when RAM is full. <br> + The maximum resident set size usage is $$max_rss$$ GB. <br> + The maximum proportional set size usage is $$max_pss$$ GB. <br> + </p> + """) + + +def render_other_line_table(process, lines): + if process == "hlt1": + return _render("") + + html_str = """ + <p> + List of line names that categorized to "Others". + </p> + """ + html_str += r'''<table border = "1"> + <tr> + <th> Name </th> + </tr>''' + for line in lines: + html_str += f''' + <tr> + <td> {line} </td> + </tr>''' + html_str += '\n</table>' + return _render(html_str) + + +def render_plots_per_wg_page(process, wgs): + if process == "hlt1": + return _render("") + + html_str = """ + <p> + Plots of rates, event sizes and bandwidths for lines, split into different WGs. + </p> + """ + for wg in wgs: + html_str += f''' + <p> + Plots of {wg} group: + </p> + <object type="image/png" data="{process}__hist__rate__{wg}.png"></object> + <object type="image/png" data="{process}__hist__dst_data_size__{wg}.png"></object> + <object type="image/png" data="{process}__hist__total_size__{wg}.png"></object> + <object type="image/png" data="{process}__hist__dst_bandwidth__{wg}.png"></object> + <object type="image/png" data="{process}__hist__tot_bandwidth__{wg}.png"></object> ''' - return list_html_str + return _render(html_str) + +def render_dst_data_hists(process): + if process == "hlt1": + return _render("") -def parse_yaml(file_path): - with open(os.path.expandvars(file_path), 'r') as f: - return yaml.safe_load(f) + html_str = '' + for hist_suffix in ("data_size", "bandwidth"): + html_str += f""" + <object type="image/png" data="{process}__hist__dst_{hist_suffix}.png"></object> + """ + return _render(html_str) + + +def render_lines_pie_chart(process): + if process == "hlt1": + return _render("") + return _render(f""" + <p> + <b>The number of selection lines per working group:</b> <br> + </p> + <object type="image/png" data="{process}__lines_per_wg.png"></object> + <p> + "Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br> + To make lines properly categorized, one should follow the naming convention - + name of lines should start with `Hlt2/Spruce[WG]_`. + </p> + """) + + +def render_bar_charts(process, + stream_config, + streams, + metrics=('bandwidth', 'rate')): + if process != "hlt2": + return _render("") + + html_str = '' + for metric in metrics: + html_str += f''' + <p> + <b>{metric.capitalize()} within each stream:</b> + </p> + <p> + "TotalInclusive" is the physical rate/bandwidth of the stream. "SumWGs" is the simple arithmetic sum of all bars except "TotalInclusive".<br> + The difference between the two bars gives us information about the degree of WG-by-WG overlap. + </p> + ''' + if stream_config == "production": + html_str += f''' + <p> + <b>Note:</b> The WG bars in the HLT2 Turbo stream correspond almost exactly to the output streams of the Sprucing passthrough of Turbo.<br> + Furthermore, this means <b>the "SumWGs" {metric} bar of HLT2 turbo is equal to the total physical {metric} of Turbo post-sprucing.</b><br> + </p> + ''' + for stream in streams: + html_str += f''' + <object type="image/png" data="{process}__{metric}_bar_chart__{stream_config}__{stream}.png"></object> + ''' + html_str += ''' + <p> + Rates for a WG within a stream are calculated by counting the number of events saved to that stream in which at least 1 of that WG's lines fired.<br> + Bandwidths for a WG are calculated by summing the event size of all events saved to the stream in which at least 1 of that WG's lines fired.<br> + </p> + ''' + return _render(html_str) + + +def render_extra_sim_matrices(process, stream_config, streams): + if process != "hlt2": + return _render("") + + html_str = """ + <p> + The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|. + It shows how much events in the stream A are covered by another stream B. <br> + The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B), + i.e. the numbers correspond to overlaps w.r.t to the column streams. <br> + </p> + <p> + The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|. + It shows how similar the two streams are and is useful in bandwidth division. <br> + </p> + """ + for stream in streams: + html_str += f""" + <p> + The overlap matrix of the {stream.capitalize()} stream is: + </p> + """ + with open( + fname_helper.intra_stream_overlap_matrix_path( + stream_config, stream), "r") as overlap: + html_str += overlap.read() + html_str += f""" + <p> + The Jaccard similarity matrix of the {stream.capitalize()} stream is: + </p> + """ + with open( + fname_helper.intra_stream_jaccard_similarities_path( + stream_config, stream), "r") as jaccard: + html_str += jaccard.read() + return _render(html_str) + + +def list_of_links_html(process, fname_helper, stream_config, building_locally): + base_path = fname_helper.base_html_path(building_locally) + links = [ + f"""<li><a href="{base_path}/{process}__all_rates.html"> A single rate/bandwidth table featuring every trigger line in all streams</a></li>""" + ] + if process != "hlt1": + rate_table_split_by_stream = fname_helper.final_rate_table_all_lines_split_by_stream_path( + stream_config, full_path=False) + links.append( + f"""<li><a href="{base_path}/{rate_table_split_by_stream}"> Rate/bandwidth tables for each stream, with 1 row per trigger line</a></li>""" + ) + links.append( + f"""<li><a href="{base_path}/{process}__similarity_matrices.html"> Jaccard similarity and overlap matrices between streams</a></li>""" + ) + + if process == "hlt2": + links += [ + f"""<li><a href="{base_path}/{process}__extra_bar_charts.html">Bar charts as below for DstData bandwidth</a></li>""", + f"""<li><a href="{base_path}/{process}__extra_similarity_matrices.html">Similarity and overlap matrices between WGs within each stream</a></li>""", + ] + + if process != "hlt1": + links += [ + f"""<li><a href="{base_path}/{process}__other_lines.html">List of lines in "Other" category</a></li>""", + f"""<li><a href="{base_path}/{fname_helper.line_descr_path(full_path=False)}"> PersistReco and ExtraOutput info for all lines in all streams</a></li>""", + f"""<li><a href="{base_path}/{process}__plots_per_wg.html"> Histograms of metrics for each WG</a></li>""" + ] + + return "\n".join(links) + + +def render_sim_matrices_page(process, fname_helper, stream_config): + if process == "hlt1": + return _render("") + + html_str = f""" + <p> + The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|. + It shows how much events in the stream A are covered by another stream B. <br> + The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B), + i.e. the numbers correspond to overlaps w.r.t to the column streams. <br> + </p> + <p> + The overlap matrix of the {stream_config} streams is: + </p> + """ + with open(fname_helper.overlap_matrix_path(stream_config), "r") as overlap: + html_str += overlap.read() + html_str += f""" + <p> + The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|. + It shows how similar the two streams are and is useful in bandwidth division. <br> + </p> + <p> + The Jaccard similarity matrix of the {stream_config} streams is: + </p> + """ + with open(fname_helper.jaccard_similarities_path(stream_config), + "r") as jaccard: + html_str += jaccard.read() + return _render(html_str) def _write_message(message, @@ -542,6 +594,18 @@ if __name__ == '__main__': type=str, required=True, help='Path to yaml config file defining the input.') + parser.add_argument( + '--stream-config', + type=str, + required=True, + choices=['wg', 'production', 'streamless'], + ) + parser.add_argument( + '--streams', + type=str, + nargs='+', + required=True, + help='List of trigger streams.') parser.add_argument( '-s', '--script-path', @@ -564,7 +628,7 @@ if __name__ == '__main__': '--skip-top-level-information-for-process-dependent-testpage', action='store_true', help= - 'Flag to avoid memory-consumption and build information sections of {proces}__index page.' + 'Flag to avoid memory-consumption and build information sections of {process}__index page.' ) parser.add_argument( '--building-locally', @@ -608,15 +672,9 @@ if __name__ == '__main__': n_high_rate = len( [info for info in rate_bw_info_by_line.values() if info.rate > tol]) - main_stream_config = { - "hlt1": "streamless", - "hlt2": "production", - "spruce": "wg" - }[args.process] - prod_df = pd.read_csv( fname_helper.final_rate_table_all_streams_path( - main_stream_config, ext="csv")) + args.stream_config, ext="csv")) tot_rate = sum(prod_df['Rate (kHz)']) tot_bandwidth = sum(prod_df['Total Bandwidth (GB/s)']) @@ -625,87 +683,61 @@ if __name__ == '__main__': rate_bw_info_by_line, tot_rate=tot_rate, tot_bandwidth=tot_bandwidth, + fname_helper=fname_helper, process=args.process) - other_line_table = make_other_line_table(other_line_list) - plots_per_wg = make_plots_per_wg_list(wg_list, args.process) + # Bar charts within a stream - only relevant for HLT2 + if args.process == 'hlt2': + for stream in args.streams: + intra_stream_rates_df = pd.read_csv( + fname_helper.tmp_rate_table_intra_stream_path( + args.stream_config, stream), + header=None) + intra_stream_rates_df.columns = [ + 'WG', 'Rate (kHz)', 'Bandwidth (GB/s)', + 'DstData Bandwidth (GB/s)' + ] + for column_header in intra_stream_rates_df.columns[1:]: + fname = { + 'Rate (kHz)': "rate", + 'Bandwidth (GB/s)': "bandwidth", + 'DstData Bandwidth (GB/s)': "dstbandwidth" + }[column_header] + make_bar_charts( + intra_stream_rates_df, column_header, stream, + fname_helper.bar_chart_path(args.stream_config, stream, + fname)) with open( - fname_helper.final_rate_table_all_streams_path(main_stream_config), + fname_helper.final_rate_table_all_streams_path(args.stream_config), "r") as rate_html: table_main_stream_rates = rate_html.read() - if args.process == 'hlt2': - template = HLT2_REPORT_TEMPLATE.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - stream_config_json_prod=fname_helper.stream_config_json_path( - "production", full_path=False), - stream_config_json_wg=fname_helper.stream_config_json_path( - "wg", full_path=False), - table_5stream_rates=table_main_stream_rates) - all_results = HLT2_ALL_RESULTS.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - line_descr=fname_helper.line_descr_path(full_path=False), - rate_table_split_by_prod_stream=fname_helper. - final_rate_table_all_lines_split_by_stream_path( - "production", full_path=False), - rate_table_split_by_wg_stream=fname_helper. - final_rate_table_all_lines_split_by_stream_path( - "wg", full_path=False), - PROCESS=args.process) - lines_per_wg = HLT2_OR_SPRUCING_LINES_PER_WG.render( - PROCESS=args.process) - dst_data_hist = HLT2_OR_SPRUCING_DST_DATA_HIST.render( - PROCESS=args.process) - dst_bw_hist = HLT2_OR_SPRUCING_DST_BW_HIST.render(PROCESS=args.process) - elif args.process == 'spruce': - template = SPRUCE_REPORT_TEMPLATE.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - stream_config_json_wg=fname_helper.stream_config_json_path( - "wg", full_path=False), - table_wgstream_rates=table_main_stream_rates) - all_results = SPRUCING_ALL_RESULTS.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - line_descr=fname_helper.line_descr_path(full_path=False), - rate_table_split_by_wg_stream=fname_helper. - final_rate_table_all_lines_split_by_stream_path( - "wg", full_path=False), - PROCESS=args.process) - lines_per_wg = HLT2_OR_SPRUCING_LINES_PER_WG.render( - PROCESS=args.process) - dst_data_hist = HLT2_OR_SPRUCING_DST_DATA_HIST.render( - PROCESS=args.process) - dst_bw_hist = HLT2_OR_SPRUCING_DST_BW_HIST.render(PROCESS=args.process) - - elif args.process == 'hlt1': - template = HLT1_REPORT_TEMPLATE.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - stream_config_json_wg=fname_helper.stream_config_json_path( - "streamless", full_path=False), - table_streamless_rates=table_main_stream_rates) - all_results = HLT1_ALL_RESULTS.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - PROCESS=args.process) - lines_per_wg = HLT1_LINES_PER_WG.render() - dst_data_hist = HLT1_DST_DATA_HIST.render() - dst_bw_hist = HLT1_DST_BW_HIST.render() + + base_path = fname_helper.base_html_path(args.building_locally) + if args.skip_top_level_information_for_process_dependent_testpage: + top_level_header = "" + memory_consumption = "" + else: + top_level_header = render_top_level_header(args.script_path, base_path) + memory_consumption = render_memory_consumption() with open( fname_helper.html_page_outputs_path(f"{args.process}__index.html"), "w") as html_file: html = SINGLE_PROCESS_REPORT_TEMPLATE.render( - TOP_LEVEL_HEADER="" - if args.skip_top_level_information_for_process_dependent_testpage - else TOP_LEVEL_HEADER.render( - SCRIPTPATH=args.script_path, - BASE_PATH=fname_helper.base_html_path(args.building_locally)), - MEMORY_CONSUMPTION="" - if args.skip_top_level_information_for_process_dependent_testpage - else MEMORY_CONSUMPTION.render(), - TEMPLATE=template, - ALL_RESULTS=all_results, - LINES_PER_WG=lines_per_wg, - DST_DATA_HIST=dst_data_hist, - DST_BW_HIST=dst_bw_hist, + TOP_LEVEL_HEADER=top_level_header, + MEMORY_CONSUMPTION=memory_consumption, + BASE_PATH=base_path, + stream_config_json=fname_helper.stream_config_json_path( + args.stream_config, full_path=False), + main_rate_table=table_main_stream_rates, + BAR_CHARTS=render_bar_charts(args.process, args.stream_config, + args.streams), + LIST_OF_LINKS=list_of_links_html(args.process, fname_helper, + args.stream_config, + args.building_locally), + LINES_PER_WG=render_lines_pie_chart(args.process), + DST_DATA_HIST=render_dst_data_hists(args.process), INPUT_CONFIG_PATH=os.path.expandvars(args.input_config), INPUT_RATE=input_info['input_rate'], INPUT_NU=input_info['nu'], @@ -715,100 +747,51 @@ if __name__ == '__main__': PROCESS=args.process) html_file.write(html) - with open( - fname_helper.process_dependent_html_page_outputs_path( - "other_lines.html"), "w") as html_file: - html = TABLE_OTHER_LINE_TEMPLATE.render( - table_other_lines=other_line_table) - html_file.write(html) - - with open( - fname_helper.process_dependent_html_page_outputs_path( - "plots_per_wg.html"), "w") as html_file: - html = PLOTS_PER_WG_TEMPLATE.render(plots_per_wg=plots_per_wg) - html_file.write(html) - - with open( - fname_helper.process_dependent_html_page_outputs_path( - "all_rates.html"), "w") as html_file: - html = ALL_RATE_TEMPLATE.render( - BASE_PATH=fname_helper.base_html_path(args.building_locally), - CSV_PATH=fname_helper.final_rate_table_all_lines_path( - "csv", full_path=False)) - html_file.write(html) - with open(fname_helper.final_rate_table_all_lines_path("html"), - "r") as rate_table: - html_file.write(rate_table.read()) - - stream_configs = { - "hlt1": ["streamless"], - "hlt2": ["production", "wg"], - "spruce": ["wg"] - }[args.process] - - if args.process != "hlt1": - with open( - fname_helper.process_dependent_html_page_outputs_path( - "similarity_matrices.html"), "w") as html_file: - html_file.write(""" - <p> - The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|. - It shows how much events in the stream A are covered by another stream B. <br> - The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B), - i.e. the numbers correspond to overlaps w.r.t to the column streams. <br> - </p> - """) - for stream_config in stream_configs: - html_file.write(f""" - <p> - The overlap matrix of the {stream_config} streams is: - </p> - """) - with open( - fname_helper.overlap_matrix_path(stream_config), - "r") as overlap: - html_file.write(overlap.read()) - html_file.write(""" - <p> - The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|. - It shows how similar the two streams are and is useful in bandwidth division. <br> - </p> - """) - for stream_config in stream_configs: - html_file.write(f""" - <p> - The Jaccard similarity matrix of the {stream_config} streams is: - </p> - """) - with open( - fname_helper.jaccard_similarities_path(stream_config), - "r") as jaccard: - html_file.write(jaccard.read()) - - with open( - fname_helper.process_dependent_html_page_outputs_path( - "rates_streaming.html"), "w") as html_file: - for stream_config in stream_configs: - html_file.write(f""" - <p> - The rates, event sizes and bandwidths of the {stream_config} streams are: - </p> - """) - with open( - fname_helper.final_rate_table_all_streams_path( - stream_config), "r") as rate_html: - html_file.write(rate_html.read()) + # Extra pages + write_html_page( + fname_helper.process_dependent_html_page_outputs_path( + "other_lines.html"), + render_other_line_table(args.process, other_line_list)) + + write_html_page( + fname_helper.process_dependent_html_page_outputs_path( + "plots_per_wg.html"), + render_plots_per_wg_page(args.process, wg_list)) + + write_html_page( + fname_helper.process_dependent_html_page_outputs_path( + "all_rates.html"), + render_all_lines_page(fname_helper, args.building_locally)) + + write_html_page( + fname_helper.process_dependent_html_page_outputs_path( + "similarity_matrices.html"), + render_sim_matrices_page(args.process, fname_helper, + args.stream_config)) + + write_html_page( + fname_helper.process_dependent_html_page_outputs_path( + "extra_bar_charts.html"), + render_bar_charts( + args.process, + args.stream_config, + args.streams, + metrics=['dstbandwidth'])) + + write_html_page( + fname_helper.process_dependent_html_page_outputs_path( + "extra_similarity_matrices.html"), + render_extra_sim_matrices(args.process, args.stream_config, + args.streams)) with open(fname_helper.html_page_outputs_path("index.html"), "w") as html_file: if args.multiple_processes: html = HLT2_AND_SPRUCE_REPORT_TEMPLATE.render( - TOP_LEVEL_HEADER=TOP_LEVEL_HEADER.render( - SCRIPTPATH=args.script_path, - BASE_PATH=fname_helper.base_html_path( - args.building_locally)), - BASE_PATH=fname_helper.base_html_path(args.building_locally), - MEMORY_CONSUMPTION=MEMORY_CONSUMPTION.render()) + TOP_LEVEL_HEADER=render_top_level_header( + args.script_path, base_path), + BASE_PATH=base_path, + MEMORY_CONSUMPTION=render_memory_consumption()) html_file.write(html) else: # In single-process tests, need 'index.html' to be picked up. diff --git a/python/PRConfig/bandwidth_helpers.py b/python/PRConfig/bandwidth_helpers.py index 8bddb618..13b8d6ff 100644 --- a/python/PRConfig/bandwidth_helpers.py +++ b/python/PRConfig/bandwidth_helpers.py @@ -12,6 +12,53 @@ import os import yaml +KNOWN_WORKING_GROUPS = [ + "B2CC", + "B2OC", + "BandQ", + "BnoC", + "Calib", + "Calo", + "Charm", + "DPA", + "HLT", + "IFT", + "Luminosity", + "PID", + "QCD", + "QEE", + "RD", + "RTA", + "SLB", + "Topo", + "Tagging", + "Tracking", + "TrackEff", + "HadInt", + "Monitoring", + "CutBasedDiLep", + "InclDetDiLep", +] + +CUSTOM_WGS = { + "Hlt2Topo": "Topo", + "Hlt2CutBasedIncl": "CutBasedDiLep", + "Hlt2_InclDet": "InclDetDiLep" +} + + +def guess_wg(line_name, process): + # First, expect it follows naming convention e.g. <Hlt2,Spruce><WG>_MyLine + line_prefix = line_name.split("_")[0].removeprefix(process.capitalize()) + if line_prefix in KNOWN_WORKING_GROUPS: + return line_prefix + + for guess, custom_wg in CUSTOM_WGS.items(): + if line_name.startswith(guess): + return custom_wg + + return "Other" # If you made it here, all guesses failed + def parse_yaml(file_path): with open(os.path.expandvars(file_path), 'r') as f: @@ -147,12 +194,30 @@ class FileNameHelper(object): self._join(self.process, stream_config, "overlap_matrix") + ".html") + def intra_stream_jaccard_similarities_path(self, stream_config, stream): + return os.path.join( + self.base_dir, self.output_subdir, + self._join(self.process, stream_config, stream, + "jaccard_similarity_matrix") + ".html") + + def intra_stream_overlap_matrix_path(self, stream_config, stream): + return os.path.join( + self.base_dir, self.output_subdir, + self._join(self.process, stream_config, stream, "overlap_matrix") + + ".html") + def event_no_fname(self, stream_config, stream): return os.path.join( self.base_dir, self.output_subdir, self._join(self.process, "event_numbers", stream_config, stream) + ".json") + def intra_stream_event_no_fname(self, stream_config, stream): + return os.path.join( + self.base_dir, self.output_subdir, + self._join(self.process, "event_numbers_intra_stream", + stream_config, stream) + ".json") + def _tmp_rate_table_path(self, stream_config, stream, line_or_stream): return os.path.join( self.base_dir, self.output_subdir, self.inter_subsubdir, @@ -165,6 +230,12 @@ class FileNameHelper(object): def tmp_rate_table_per_stream_path(self, stream_config, stream): return self._tmp_rate_table_path(stream_config, stream, "stream") + def tmp_rate_table_intra_stream_path(self, stream_config, stream): + return os.path.join( + self.base_dir, self.output_subdir, self.inter_subsubdir, + self._join(self.process, f"rates_wgs_within_{stream}", + stream_config, stream) + ".csv") + def final_rate_table_all_lines_path(self, ext="html", full_path=True): fname = self._join(self.process, "rates_for_all_lines") + f".{ext}" return os.path.join(self.base_dir, self.output_subdir, @@ -195,3 +266,9 @@ class FileNameHelper(object): def process_dependent_html_page_outputs_path(self, fname): return os.path.join(self.base_dir, self.output_subdir, self._join(self.process, fname)) + + def bar_chart_path(self, main_stream_config, stream, metric): + return os.path.join( + self.base_dir, self.output_subdir, + self._join(self.process, f'{metric}_bar_chart', main_stream_config, + stream) + ".png") diff --git a/scripts/benchmark-scripts/Moore_bandwidth_test.sh b/scripts/benchmark-scripts/Moore_bandwidth_test.sh index 31758748..5312a7a9 100755 --- a/scripts/benchmark-scripts/Moore_bandwidth_test.sh +++ b/scripts/benchmark-scripts/Moore_bandwidth_test.sh @@ -118,7 +118,7 @@ case $PROCESS in EVENT_SIZE_UPPER_LIMIT=200 GAUDIRUN_INPUT_PROCESS="Hlt1" OUTPUT_TYPE="MDF" - STREAM_CONFIGS=( "streamless" ) + STREAM_CONFIG="streamless" case $INPUTDATA in nominal) CONFIG_FILE="${TEST_PATH_PREFIX}hlt1_bandwidth_input.yaml" @@ -136,7 +136,7 @@ case $PROCESS in EVENT_SIZE_UPPER_LIMIT=200 GAUDIRUN_INPUT_PROCESS="Hlt2" OUTPUT_TYPE="MDF" - STREAM_CONFIGS=( "wg" "production" ) + STREAM_CONFIG="production" case $INPUTDATA in nominal) CONFIG_FILE="${TEST_PATH_PREFIX}hlt2_bandwidth_input_2024.yaml" @@ -154,7 +154,7 @@ case $PROCESS in EVENT_SIZE_UPPER_LIMIT=300 GAUDIRUN_INPUT_PROCESS="Spruce" OUTPUT_TYPE="ROOT" - STREAM_CONFIGS=( "wg" ) + STREAM_CONFIG="wg" case $INPUTDATA in nominal) CONFIG_FILE="${TEST_PATH_PREFIX}spruce_bandwidth_input.yaml" @@ -221,11 +221,9 @@ fi # 1. Run Moore. # -d downloads the input files locally for speed-up running Moore. Not helpful unless that download is fast for you (e.g. you're at CERN) -for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do - echo "Running trigger to obtain MDF/DST files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_FILE}" - time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_FILE -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py" - STORE_ERR_CODE -done +echo "Running trigger to obtain MDF/DST files with ${STREAM_CONFIG} streams for comparison over ${CONFIG_FILE}" +time python -m MooreTests.run_bandwidth_test_jobs -d -c=$CONFIG_FILE -n=$EVTMAX -p=$PROCESS -t=$MOORE_THREADS -a=$EVENT_SIZE_UPPER_LIMIT $EXTRA_OPTS "${TEST_PATH_PREFIX}${PROCESS}_bandwidth_${STREAM_CONFIG}_streams.py" +STORE_ERR_CODE # 2. Compute line descriptives: persist reco, extra output if [ $PROCESS = "hlt1" ]; then @@ -236,39 +234,47 @@ else STORE_ERR_CODE fi -for STREAM_CONFIG in "${STREAM_CONFIGS[@]}"; do - # 3. Work out what the streams are from the config JSON; needed for later steps - STREAM_CONFIG_JSON_PATH=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.stream_config_json_path('${STREAM_CONFIG}'))"`) - STREAM_STR=`(jq -r 'keys | @sh' ${STREAM_CONFIG_JSON_PATH})` - declare -a STREAMS="($STREAM_STR)" - echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}" - - # 4. Compute similarity matrices between streams by comparing event numbers - if [ $PROCESS = "hlt1" ]; then - echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"' - else - echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration" - for stream in "${STREAMS[@]}"; do - echo "Stream name: ${stream}" - time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream --file-type $OUTPUT_TYPE - STORE_ERR_CODE - done - time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py -p $PROCESS --stream-config $STREAM_CONFIG --streams ${STREAMS[@]} - STORE_ERR_CODE - fi +# 3. Work out what the streams are from the config JSON; needed for later steps +STREAM_CONFIG_JSON_PATH=(`python -c "from PRConfig.bandwidth_helpers import FileNameHelper; hlpr = FileNameHelper('${PROCESS}'); print(hlpr.stream_config_json_path('${STREAM_CONFIG}'))"`) +STREAM_STR=`(jq -r 'keys | @sh' ${STREAM_CONFIG_JSON_PATH})` +declare -a STREAMS="($STREAM_STR)" +echo "Found ${STREAM_CONFIG} streams: ${STREAMS[@]}" - # 5. Computing rates per stream as well as per line (tables split by stream) - echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration" +# 4. Compute similarity matrices between streams by comparing event numbers +if [ $PROCESS = "hlt1" ]; then + echo 'Skipping similarity matrix per stream as $PROCESS = "hlt1"' +else + echo "Obtaining similarity matrix for ${STREAM_CONFIG}-stream configuration" for stream in "${STREAMS[@]}"; do echo "Stream name: ${stream}" - time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_FILE -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE + time python $PRCONFIGROOT/python/MooreTests/list_event_numbers.py -p $PROCESS --stream-config $STREAM_CONFIG --stream $stream --file-type $OUTPUT_TYPE STORE_ERR_CODE done + time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py inter_stream --streams ${STREAMS[@]} -p $PROCESS --stream-config $STREAM_CONFIG + STORE_ERR_CODE +fi + +# 5. Computing rates per stream as well as per line (tables split by stream) +echo "Obtaining rates and bandwidth for ${STREAM_CONFIG}-stream configuration" +for stream in "${STREAMS[@]}"; do + echo "Stream name: ${stream}" + time python $PRCONFIGROOT/python/MooreTests/line-and-stream-rates.py -c $CONFIG_FILE -p $PROCESS -s $stream --stream-config $STREAM_CONFIG --file-type $OUTPUT_TYPE + STORE_ERR_CODE done +# 6. Compute intra-stream overlap between WGs (only really envisaged for HLT2 where streams != WGs) +if [ $PROCESS == "hlt2" ]; then + echo 'Computing intra-stream WG overlaps in each production stream' + for stream in "${STREAMS[@]}"; do + echo "Stream name: ${stream}" + time python $PRCONFIGROOT/python/MooreTests/calculate_stream_overlap.py intra_stream --stream $stream -p $PROCESS --stream-config $STREAM_CONFIG + STORE_ERR_CODE + done +fi + # 6. Combine all output into tables echo 'Combining all rate and bandwidth tables' -time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS +time python $PRCONFIGROOT/python/MooreTests/combine_rate_output.py --process $PROCESS --stream-config $STREAM_CONFIG STORE_ERR_CODE # 7. Required information for 'hlt2-output-locally' or 'hlt2-output-from-eos' sprucing jobs. @@ -278,6 +284,10 @@ if [ $PROCESS = "hlt2" ] && [ $INPUTDATA = "nominal" ]; then STORE_ERR_CODE fi -# 8. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally +# 8. Copy the stream config JSON from tmp/MDF -> tmp/Output so the handler can pick it up. Bit of a hack +cp $STREAM_CONFIG_JSON_PATH tmp/Output/ +STORE_ERR_CODE + +# 9. Produce plots and HTML pages; add the --building-locally flag to make the links work if you are building the html pages locally echo 'Making plots and HTML pages' -time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE $TOP_LEVEL_FLAG $TEST_PAGE_EXTRA_OPTS +time python -m MooreTests.make_bandwidth_test_page -p $PROCESS -c $CONFIG_FILE -s $SCRIPT_PATH -e $ERR_CODE --streams ${STREAMS[@]} --stream-config $STREAM_CONFIG $TOP_LEVEL_FLAG $TEST_PAGE_EXTRA_OPTS -- GitLab