Newer
Older
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
import argparse
import jinja2
import matplotlib.pyplot as plt
import pandas as pd
import json
from math import log10
from dataclasses import dataclass, field
from typing import List
from collections import namedtuple

Ross John Hunter
committed
from PRConfig.bandwidth_helpers import (FileNameHelper, parse_yaml, guess_wg,
KNOWN_WORKING_GROUPS,
KNOWN_STREAM_CONFIGS_BY_STAGE)
MAIN_HISTOGRAMS = ["rate", "total_size", "tot_bandwidth"]
EXTRA_HISTOGRAMS = ["dst_data_size", "dst_bandwidth"]
MAIN_BAR_CHARTS = {
"rate": 'Rate (kHz)',
"bandwidth": 'Bandwidth (GB/s)',
}
EXTRA_BAR_CHARTS = {"dstbandwidth": 'DstData Bandwidth (GB/s)'}
TDR_BANDWIDTHS = {
"hlt2": {
"production": {
"full": 5.90,
"turbo": 2.50,
"turcal": 1.60,
"total": 10.00,
},
},
"spruce": {

Ross John Hunter
committed
"full": {
"total": 0.80
},

Ross John Hunter
committed
"turbo": {
"total": 2.50
},
"turcal": {
"total": 0.2
},
"no_bias": {
"total": 0.0
},
"hlt2calib": {
"total": 0.0
},
"lumi": {
"total": 0.0
}
}
}
PRETTY_STREAM_NAMES = {
"slepton": "SL",
"sl": "SL",
"qee": "QEE",
"rd": "RD",
"bandq": "B&Q",
"b_to_open_charm": "B2OC",
"b2oc": "B2OC",
"bnoc": "BnoC",
"b_to_charmonia": "B2CC",
"b2cc": "B2CC",
"charm": "Charm",
"ift": "IFT",
"full": "Full",
"turbo": "Turbo",
"turcal": "TurCal",
"Turcal_mDST": "MDST",
"Turcal_persistreco": "PersistReco",
"Turcal_rawbanks": "RawBanks",
"Turcal_persistrecorawbanks": "PRRawBanks",
"no_bias": "NoBias",
"lumi": "Lumi",
"hlt2calib": "Calib"
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
}
def render_top_level_page(script_path: str,
base_path: str,
test_configs: List[tuple[str, str]],
to_disk_bar_chart=False):
html_str = f"""
<html>
<head></head>
<body>
<p>
slot.build_id: $$version$$<br>
start time: $$start_time$$<br>
end time: $$end_time$$<br>
platform: $$platform$$<br>
hostname: $$hostname$$<br>
cpu_info: $$cpu_info$$<br>
testing script path: {script_path}
</p>
<ul>
<li><a href="{base_path}/run.log">Logs</a></li>
</ul>
<p>
The bandwidth test ran the following sub-tests (process, streaming configuration): {test_configs}<br>
The appropriate webpages can be found below for each test below. Scroll down for a report of the test's memory consumption.
<ul>
"""
for process, stream_config in test_configs:

Ross John Hunter
committed
fname_helper = FileNameHelper(process, stream_config)
html_str += f"""

Ross John Hunter
committed
<li><a href="{base_path}/{fname_helper.index_html_page_path()}">{process.capitalize()} ({stream_config}) results</a></li>
"""
html_str += """</ul></p>"""
if to_disk_bar_chart:
html_str += f"""
<p>
Summary of bandwidth of all streams to disk (only available for those tests that run all sprucing stages):<br>
</p>
<object type="image/png" data="{FileNameHelper(process="spruce", stream_config="").to_disk_bar_chart_path(full_path=False)}"></object>
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
html_str += """
<p>
<b> Memory consumption of this test: </b>
</p>
<object type="image/png" data="memory_consumption.png"></object>
<p>
Memory consumption as functions of Wall-time. <br>
The virtual memory size is the total amount of memory the process may hypothetically access. <br>
The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
Swap memory is used when RAM is full. <br>
The maximum resident set size usage is $$max_rss$$ GB. <br>
The maximum proportional set size usage is $$max_pss$$ GB. <br>
</p>
</body>
</html>"""
return _render(html_str)
def render_single_test_page(process: str, stream_config: str,
input_config_path: str, streams: List[str],
args: argparse.Namespace):

Ross John Hunter
committed
fname_helper = FileNameHelper(process, stream_config)
base_path = fname_helper.base_html_path(args.building_locally)
input_info = parse_yaml(input_config_path)
exit_code = 1 # Assume failure
with open(fname_helper.message_path(), "r") as f:
exit_code = int(json.load(f)[process][stream_config]["code"])
if exit_code == 0:
exit_code_sentence = "All sub-jobs in this test exited successfully."
else:
exit_code_sentence = "There were errors in some of the sub-jobs of this test; please see the logs."
headline_bar_chart_path = ""
if process != "hlt1":
headline_bar_chart_path = fname_helper.headline_bar_chart_path(

Ross John Hunter
committed
full_path=False)
html_str = f"""
<html>
<head></head>
<body>
<p style="color:{'green' if exit_code == 0 else 'red'}">
<b>{exit_code_sentence}</b>
</p>
<p>
This page contains the results of the {process} bandwidth test with the {stream_config} streaming configuration. Scroll down to see:
<li> Summary of main results, </li>
<li> Details of the streaming configuration, </li>
<li> Links to other html pages produced by this test, </li>
<li> Bar charts of rate and bandwidth for each WG within each stream (HLT2 only), </li>
<li> A pie chart of all lines split by WGs (HLT2 and sprucing only), </li>
<li> Information about the input sample, </li>
<li> Stacked histograms of all lines, split by WG, of rate/bandwidth metrics. </li>
<b>Main results:</b> <br>
<object type="image/png" data="{headline_bar_chart_path}"></object>
</p>
"""

Ross John Hunter
committed
with open(fname_helper.final_rate_table_all_streams_path(),
"r") as rate_html:
html_str += rate_html.read()

Ross John Hunter
committed
total_rate, total_bw = total_rate_and_bw(fname_helper)
html_str += f"""
<p>
<b>The total bandwidth (rate) was measured to be {total_bw:.2f} GB/s ({total_rate:.2f} kHz).</b><br>
</p>
"""
stream_config_json_path = fname_helper.stream_config_json_path(

Ross John Hunter
committed
full_path=False)
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
html_str += f"""
<p>
The streaming configuration (i.e. which lines went to each stream) can be found in JSON format
<a href="{base_path}/{stream_config_json_path}">here</a>. <br>
This streaming configuration is our current set of lines to be used in the next data-taking period. <br>
"DstData" is the raw bank to which reconstructed information (candidates, other reconstructed tracks etc.) are saved. <br>
The "DstData bandwidth" is therefore the bandwidth counting only that raw bank. <br>
The total event size (and total bandwidth) count all raw banks (incl. DstData, and detector raw banks if present) in the file. <br>
"""
if process != "hlt1":
html_str += """
<b>NB:
In real data-taking, raw banks are now left uncompressed when writing, and then the whole file is compressed afterwards.
We account for this compression by multiplying event sizes and bandwidths by a scaling factor to give accurate per-stream bandwidths.
The scaling factor is calculated for each file in the test as: '(size of the compressed file) / (size of the uncompressed file)'.
</b>
<br>
</p>
"""
else:
html_str += """</p>"""
html_str += f"""
<p>
Further results can be found in the links below:
</p>
<ul>

Ross John Hunter
committed
{list_of_links_html(fname_helper, args.building_locally)}
$${fname_helper.comparison_str()}$$
</b></b>
</ul>
<p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p>

Ross John Hunter
committed
{render_bar_charts(fname_helper, streams)}
{render_lines_pie_chart(fname_helper)}
<p>
<b>Input sample information:</b>
<ul>
<li>Config file: {os.path.expandvars(input_config_path)}</li>
<li>Input rate: {input_info['input_rate']} kHz</li>
<li>Number of interactions per bunch crossing (ν): {input_info['nu']}</li>
<li>Radius of VELO opening: {input_info['velo_radial_opening']} mm</li>
</ul>
</p>
<p>
<b>Stacked histograms of all lines, split by WG, of rate/bandwidth metrics:</b> <br>
The total distributions are shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page linked above. <br>
Total event size is calculated from summing all raw banks in the file (including DstData). <br>
Where appropriate, the DstData raw bank size and DstData bandwidth are calculated from summing only the DstData raw bank. <br>
</p>
"""
for hist_suffix in MAIN_HISTOGRAMS:
html_str += f"""

Ross John Hunter
committed
<object type="image/png" data="{fname_helper.hist_path(hist_suffix, full_path=False)}"></object>
"""
html_str += f"""

Ross John Hunter
committed
{render_dst_data_hists(fname_helper)}
</body>
</html>
"""
return _render(html_str)
@dataclass
class WGRateBWInfo:
nlines: int = 0
rate: List[float] = field(default_factory=lambda: [])
dst_size: List[float] = field(default_factory=lambda: [])
tot_size: List[float] = field(default_factory=lambda: [])
dst_bw: List[float] = field(default_factory=lambda: [])
tot_bw: List[float] = field(default_factory=lambda: [])
LineRateBWInfo = namedtuple(
"LineRateBWInfo", ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"])
def histo_maker(entry_list,
xlabel,
plot_path,
nbins=100,
range=None,
take_log=False,
stacked=False,
labels=[],
legend=False):
title = ""
safe_log = lambda rate: log10(rate) if rate > float(f'1e{log_th}') else log_th - 1
title = f"(all values <= log10(1e{log_th}) are in the first bin)"
if stacked:
# entry_list is a list of lists
entry_list = [[safe_log(rate) for rate in lst]
for lst in entry_list]
else:
entry_list = [safe_log(rate) for rate in entry_list]
fig = plt.figure()
if range:
# If specified, range should be a 2-tuple of floats (low, high)
plt.hist(entry_list, nbins, range=range, stacked=stacked, label=labels)
else:
plt.hist(entry_list, nbins, stacked=stacked, label=labels)
plt.xlabel(xlabel)
plt.ylabel("Number of lines")
if title: plt.title(title)
if legend: plt.legend(loc='upper right')
plt.yscale('log', nonpositive='clip')
plt.savefig(plot_path, format="png")
plt.close(fig)
def list_of_other_lines(process, all_lines_bw_info):
return [
line for line in all_lines_bw_info.keys()
if guess_wg(line, process) == "Other"
]
def make_plots(all_lines_bw_info: dict[str, LineRateBWInfo],

Ross John Hunter
committed
fname_helper: FileNameHelper):
Make plots of rate, bandwidth and event sizes of all lines.
It will create 5 stacked histograms containing distributions of all lines
grouped by WG, and a pie chart showing the number of lines per WG.
'''
# Count number of lines and rates/evt sizes per WG
rate_info_per_wg = {
wg: WGRateBWInfo()
for wg in KNOWN_WORKING_GROUPS + ["Other"]
}
for line, bw_info in all_lines_bw_info.items():
wg_guess = guess_wg(line, fname_helper.process)
rate_info_per_wg[wg_guess].nlines += 1
for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
getattr(rate_info_per_wg[wg_guess], attrib).append(
getattr(bw_info, attrib))
rate_info_per_wg = {
k: info
for k, info in rate_info_per_wg.items() if info.nlines != 0
}
# Make a pie chart of lines per WG
labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()]
pie = plt.pie([v.nlines for v in rate_info_per_wg.values()],
radius=1,
wedgeprops=dict(width=0.4, edgecolor="w"))
plt.legend(
pie[0],
labels,
loc='center',
bbox_to_anchor=(1, 0.5),
bbox_transform=plt.gcf().transFigure)
plt.title(f"Number of {fname_helper.process.capitalize()} lines per WG")
plt.savefig(

Ross John Hunter
committed
fname_helper.pie_chart_path(full_path=True),
format="png",
bbox_inches='tight')
# Stacked histograms
for attrib, xtitle, plot_bit, log_th, range in zip(
["rate", "tot_size", "tot_bw", "dst_size", "dst_bw"], [
"Log10(Rate [Hz])", "Total Event Size [kB]",
"Log10(Bandwidth from Total Event Size [GB/s])",
"DstData RawBank Size [kB]",
"Log10(Bandwidth from DstData Size [GB/s])"
], MAIN_HISTOGRAMS + EXTRA_HISTOGRAMS, [-1, 0, -4, 0, -4],
[(-2, 7),
(0, 500 if fname_helper.process == 'hlt2' else 1000), (-5, 2),
(0, 500 if fname_helper.process == 'hlt2' else 1000), (-5, 2)]):
histo_maker(
[getattr(info, attrib) for info in rate_info_per_wg.values()],
xtitle,

Ross John Hunter
committed
fname_helper.hist_path(plot_bit, full_path=True),
take_log="Log10" in xtitle,
stacked=True,
legend=True,
labels=list(rate_info_per_wg.keys()))
return
def _important_bar_chart_maker(bandwidths: dict[str, dict[str, float]],
process: str,
stream_config="",
is_to_total_to_disk_bar_chart=True):

Ross John Hunter
committed
fname_helper = FileNameHelper(process, stream_config)
colors = {'Current': 'tab:orange', 'TDR': 'tab:grey'}
width = 0.4
fig, ax = plt.subplots()
plt.grid(True, axis='y', zorder=0, linestyle='dashed')
for i_col, (label, bandwidths_by_stream) in enumerate(bandwidths.items()):
offset = width * i_col
bars = ax.bar([x + offset for x in range(len(bandwidths_by_stream))],
bandwidths_by_stream.values(),
width=width,
label=label,
zorder=3,
color=colors[label])
if process == "spruce" and not is_to_total_to_disk_bar_chart:
# Only label the last bar - dont have per-WG expectations
ax.bar_label(
bars,
labels=[''] * (len(bars) - 1) +
[round(bandwidths_by_stream['Total'], 2)])
else:
ax.bar_label(
bars,
labels=[
round(val, 2) for val in bandwidths_by_stream.values()
])
ax.set_ylabel('Bandwidth (GB/s)')
if is_to_total_to_disk_bar_chart:
title = "Sprucing (output to disk)"
else:
title = {
"hlt2": "Hlt2 (output to tape)",

Ross John Hunter
committed
"spruce": f"Sprucing of {stream_config} stream to disk"
}[process]
ax.set_title(title)
# Have to do weird stuff with ticks for (spruce and not important chart) as only have 1 TDR bar
tick_pos_opt = 'weird' if process == 'spruce' and not is_to_total_to_disk_bar_chart else 'default'
'default': [x + width / 2 for x in range(len(bandwidths_by_stream))],
'weird': [x for x in range(len(bandwidths_by_stream) - 1)] +
[len(bandwidths_by_stream) - 1 + width / 2]
}[tick_pos_opt]
ax.set_xticks(tick_positions, bandwidths_by_stream.keys())
ax.legend(loc='upper center', ncols=2)
plot_path = fname_helper.to_disk_bar_chart_path(
full_path=True
) if is_to_total_to_disk_bar_chart else fname_helper.headline_bar_chart_path(

Ross John Hunter
committed
full_path=True)
plt.savefig(plot_path, format="png")
plt.close(fig)

Ross John Hunter
committed
def headline_bar_charts(fname_helper: FileNameHelper):
"""Headline bar chart of rate/bandwidth per stream c.f. TDR"""
process = fname_helper.process

Ross John Hunter
committed
stream_config = fname_helper.stream_config
rates_df = pd.read_csv(

Ross John Hunter
committed
fname_helper.final_rate_table_all_streams_path(ext='csv'))
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
bandwidths = {
"Current":
dict(zip(rates_df['Stream'], rates_df['Total Bandwidth (GB/s)'])),
"TDR": {
stream: TDR_BANDWIDTHS[process][stream_config].get(stream, 0)
for stream in rates_df['Stream'].to_list()
}
}
for series in ["Current", "TDR"]:
bandwidths[series] = {
PRETTY_STREAM_NAMES.get(stream, stream): val
for stream, val in bandwidths[series].items()
}
bandwidths['Current']['Total'] = sum(bandwidths['Current'].values())
bandwidths['TDR']['Total'] = TDR_BANDWIDTHS[process][stream_config][
'total']
_important_bar_chart_maker(
bandwidths,
process,
stream_config,
is_to_total_to_disk_bar_chart=False)
def _make_bar_chart(rates_df, column, stream, plot_path):
"""Bar charts of the WG-by-WG rates within 1 stream"""
fig = plt.figure()
plt.grid(True, axis='y', zorder=0, linestyle='dashed')
bars = plt.bar(rates_df['WG'], rates_df[column], zorder=3)
plt.bar_label(bars, labels=[round(val, 2) for val in rates_df[column]])
plt.ylabel(column)
plt.xticks(rates_df['WG'], rates_df['WG'], rotation='vertical')
plt.subplots_adjust(bottom=0.25)
plt.title(f'{column} for each WG in the {stream.capitalize()} stream')
plt.savefig(plot_path, format="png")
plt.close(fig)

Ross John Hunter
committed
def make_per_wg_bar_charts(fname_helper: FileNameHelper, streams: list[str]):
all_bar_charts = {**MAIN_BAR_CHARTS, **EXTRA_BAR_CHARTS}
for stream in streams:
print(f"Making per-WG bar charts for {stream}")
try:
intra_stream_rates_df = pd.read_csv(

Ross John Hunter
committed
fname_helper.tmp_rate_table_intra_stream_path(stream),
header=None)
# NOTE beware if the ordering of the columns ever changes in line-and-stream-rates.py
intra_stream_rates_df.columns = ['WG'] + list(
all_bar_charts.values())
for metric, column in all_bar_charts.items():
_make_bar_chart(
intra_stream_rates_df, column, stream,
fname_helper.bar_chart_path(

Ross John Hunter
committed
stream, metric, full_path=True))
except pd.errors.EmptyDataError:
print(f"Per-WG bar charts: skipping {stream} as no rates found")
return
def write_html_page(page_path, rendered_html):
if rendered_html:
with open(page_path, "w") as html_file:
html_file.write(rendered_html)
def _render(html_str):
return jinja2.Template(html_str).render()

Ross John Hunter
committed
def render_all_lines_page(fname_helper, building_locally):
csv_path = fname_helper.final_rate_table_all_lines_path(

Ross John Hunter
committed
"csv", full_path=False)
html_str = f"""
<p>
Rates, event sizes and bandwidths of all lines, listed descending in bandwidth. <br>
Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br>
Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br>
These numbers are also saved in a csv file: <a href="{fname_helper.base_html_path(building_locally)}/{csv_path}">{csv_path}</a>
</p>
"""

Ross John Hunter
committed
with open(fname_helper.final_rate_table_all_lines_path("html"),
"r") as rate_table:
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
html_str += rate_table.read()
return _render(html_str)
def render_other_line_table(process, lines):
if process == "hlt1":
return _render("")
html_str = """
<p>
List of line names that categorized to "Others".
</p>
"""
html_str += r'''<table border = "1">
<tr>
<th> Name </th>
</tr>'''
for line in lines:
html_str += f'''
<tr>
<td> {line} </td>
</tr>'''
html_str += '\n</table>'
return _render(html_str)

Ross John Hunter
committed
def render_dst_data_hists(fname_helper: FileNameHelper):
if fname_helper.process == "hlt1":
return _render("")
html_str = ''
for hist_suffix in EXTRA_HISTOGRAMS:
html_str += f"""

Ross John Hunter
committed
<object type="image/png" data="{fname_helper.hist_path(hist_suffix, full_path=False)}"></object>
"""
return _render(html_str)

Ross John Hunter
committed
def render_lines_pie_chart(fname_helper: FileNameHelper):
if fname_helper.process == "hlt1":
return _render("")
return _render(f"""
<p>
<b>The number of selection lines per working group:</b> <br>
</p>

Ross John Hunter
committed
<object type="image/png" data="{fname_helper.pie_chart_path(full_path=False)}"></object>
<p>
"Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
To make lines properly categorized, one should follow the naming convention -
name of lines should start with `Hlt2/Spruce[WG]_`.
</p>
""")
def render_bar_charts(fname_helper: FileNameHelper,
streams: list[str],
metrics=MAIN_BAR_CHARTS.keys()):
if fname_helper.process != "hlt2":
return _render("")
html_str = ''
for metric in metrics:
html_str += f'''
<p>
<b>{metric.capitalize()} within each stream:</b>
</p>
<p>

Ross John Hunter
committed
"TotalInclusive" is the physical {metric} of the stream.<br>
Each WG-specific {metric} bar is calculated by iterating through the events in the streamed HLT2 output file, and counting {metric} due to that event if one or more lines from that WG fired.<br>
These WG-specific {metric}s are therefore not physical (no per-WG streams exist at HLT2) but roughly indicate the proportion of the stream's {metric} due to each WG.<br>
"SumWGs" is the simple arithmetic sum of all bars except "TotalInclusive" - the former will be larger than the latter if there is non-negligible WG-by-WG overlap.<br>
</p>
'''

Ross John Hunter
committed
if metric == "bandwidth":
html_str += '''
<p>

Ross John Hunter
committed
In the Turbo stream, the WG categorisation corresponds almost exactly to the output streams of the Sprucing passthrough of Turbo.<br>
However, the "SumWGs" bandwidth here will not equal the sum of the sprucing passthrough streams, as the file formats and compression settings are different between HLT2 and sprucing.<br>
Nevertheless, significant WG-by-WG overlap in Turbo here is an indicator that the sprucing passthrough streams will be significantly inflated by overlap with other WGs.<br>,
(If an event fires Turbo lines from 2 WGs at HLT2, both candidates are streamed to both WG streams by the passthrough sprucing, inflating the bandwidth of both streams).<br>
</p>
'''
for stream in streams:
html_str += f'''

Ross John Hunter
committed
<object type="image/png" data="{fname_helper.bar_chart_path(stream, metric, full_path=False)}"></object>
'''
return _render(html_str)
SIM_MATRICES_DESCR = """
<p>
The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
It shows how much events in the stream A are covered by another stream B. <br>
The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
</p>
<p>
The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
It shows how similar the two streams are and is useful in bandwidth division. <br>
</p>
"""

Ross John Hunter
committed
def render_sim_matrices_page(fname_helper: FileNameHelper):
if fname_helper.process == "hlt1":
return _render("")
html_str = SIM_MATRICES_DESCR
html_str += f"""
<p>

Ross John Hunter
committed
The overlap matrix of the {fname_helper.stream_config} streams is:
</p>
"""

Ross John Hunter
committed
with open(fname_helper.overlap_matrix_path(), "r") as overlap:
html_str += overlap.read()
html_str += f"""
<p>

Ross John Hunter
committed
The Jaccard similarity matrix of the {fname_helper.stream_config} streams is:
</p>
"""

Ross John Hunter
committed
with open(fname_helper.jaccard_similarities_path(), "r") as jaccard:
html_str += jaccard.read()
return _render(html_str)

Ross John Hunter
committed
def render_extra_sim_matrices(fname_helper: FileNameHelper,
streams: list[str]):
if fname_helper.process != "hlt2":
return _render("")
html_str = SIM_MATRICES_DESCR
for stream in streams:
html_str += f"""
<p>
The overlap matrix of the {stream.capitalize()} stream is:
</p>
"""

Ross John Hunter
committed
with open(fname_helper.intra_stream_overlap_matrix_path(stream),
"r") as overlap:
html_str += overlap.read()
html_str += f"""
<p>
The Jaccard similarity matrix of the {stream.capitalize()} stream is:
</p>
"""
with open(

Ross John Hunter
committed
fname_helper.intra_stream_jaccard_similarities_path(stream),
"r") as jaccard:
html_str += jaccard.read()
return _render(html_str)

Ross John Hunter
committed
def list_of_links_html(fname_helper: FileNameHelper, building_locally: bool):
base_path = fname_helper.base_html_path(building_locally)

Ross John Hunter
committed
_all_rates_path = fname_helper.all_rates_html_page_path(full_path=False)
links = [
f"""<li><a href="{base_path}/{_all_rates_path}"> A single rate/bandwidth table featuring every trigger line in all streams</a></li>"""
if fname_helper.process != "hlt1":
_rate_table_split_by_stream = fname_helper.final_rate_table_all_lines_split_by_stream_path(

Ross John Hunter
committed
full_path=False)
_sim_matrices_path = fname_helper.sim_matrices_html_page_path(

Ross John Hunter
committed
full_path=False)
links += [
f"""<li><a href="{base_path}/{_rate_table_split_by_stream}"> Rate/bandwidth tables for each stream, with 1 row per trigger line</a></li>""",
f"""<li><a href="{base_path}/{_sim_matrices_path}"> Jaccard similarity and overlap matrices between streams</a></li>"""

Ross John Hunter
committed
]
if fname_helper.process == "hlt2":

Ross John Hunter
committed
_rate_table_by_stream_by_wg = fname_helper.final_rate_table_all_lines_split_by_stream_by_wg_path(
full_path=False)
_extra_bar_charts_path = fname_helper.extra_bar_charts_html_page_path(

Ross John Hunter
committed
full_path=False)
_extra_sim_matrices_path = fname_helper.extra_sim_matrices_html_page_path(

Ross John Hunter
committed
full_path=False)
links += [

Ross John Hunter
committed
f"""<li><a href="{base_path}/{_rate_table_by_stream_by_wg}"> Rate/bandwidth tables for each stream, split also by WG, with 1 row per trigger line</a></li>""",
f"""<li><a href="{base_path}/{_extra_bar_charts_path}">Bar charts as below for DstData bandwidth</a></li>""",
f"""<li><a href="{base_path}/{_extra_sim_matrices_path}">Similarity and overlap matrices between WGs within each stream</a></li>""",
]
if fname_helper.process != "hlt1":
_other_lines_path = fname_helper.other_lines_html_page_path(

Ross John Hunter
committed
full_path=False)
_line_descr_path = fname_helper.line_descr_path(full_path=False)
links += [
f"""<li><a href="{base_path}/{_other_lines_path}">List of lines in "Other" category</a></li>""",
f"""<li><a href="{base_path}/{_line_descr_path}"> PersistReco and ExtraOutput info for all lines in all streams</a></li>"""
]
return "\n".join(links)

Ross John Hunter
committed
def total_rate_and_bw(fname_helper: FileNameHelper):
streams_df = pd.read_csv(

Ross John Hunter
committed
fname_helper.final_rate_table_all_streams_path(ext="csv"))
return sum(streams_df['Rate (kHz)']), sum(
streams_df['Total Bandwidth (GB/s)'])

Ross John Hunter
committed
def write_message(fname_helper: FileNameHelper,
bw_info_by_line: dict[str, LineRateBWInfo]):
"""Append to message.json for the BandwidthTestHandler to send info to Gitlab and Mattermost"""
high = {
'hlt1': 1e6,
'hlt2': 1000,
'spruce': 500
}[fname_helper.process] # Hz
n_low_rate = len(
[info for info in bw_info_by_line.values() if info.rate == 0])
n_high_rate = len(
[info for info in bw_info_by_line.values() if info.rate > high])

Ross John Hunter
committed
tot_rate, tot_bandwidth = total_rate_and_bw(fname_helper)
# load up message.json
with open(fname_helper.message_path(), "r") as message:
info = json.load(message)

Ross John Hunter
committed
for k, v in {
"total_rate": tot_rate,
"total_bandwidth": tot_bandwidth,
"n_low_rate": n_low_rate,
"n_high_rate": n_high_rate
}.items():
info[fname_helper.process][fname_helper.stream_config][k] = v
with open(fname_helper.message_path(), "w") as f:
json.dump(info, f, indent=4)
return 0
def make_html_for_single_test(process: str, stream_config: str,
input_config_path: str,
args: argparse.Namespace):

Ross John Hunter
committed
fname_helper = FileNameHelper(process, stream_config)

Ross John Hunter
committed
with open(fname_helper.stream_config_json_path(),
"r") as stream_config_json:
streams = list(json.load(stream_config_json).keys())
### Make plots & tables
# Headline bar charts
if stream_config != "streamless":

Ross John Hunter
committed
headline_bar_charts(fname_helper)
if process == 'hlt2':

Ross John Hunter
committed
make_per_wg_bar_charts(fname_helper, streams)
df = pd.read_csv(

Ross John Hunter
committed
fname_helper.final_rate_table_all_lines_path("csv"), sep=',')
kHz_to_Hz = 1000
rate_bw_info_by_line = {
df['Line'][i]: LineRateBWInfo(
df['Rate (kHz)'][i] * kHz_to_Hz, df["Avg DstData Size (kB)"][i],
df["Avg Total Event Size (kB)"][i],
df["DstData Bandwidth (GB/s)"][i], df["Total Bandwidth (GB/s)"][i])
for i in range(len(df))

Ross John Hunter
committed
make_plots(rate_bw_info_by_line, fname_helper=fname_helper)
# Extra pages
write_html_page(

Ross John Hunter
committed
fname_helper.other_lines_html_page_path(full_path=True),
render_other_line_table(
process, list_of_other_lines(process, rate_bw_info_by_line)))
write_html_page(

Ross John Hunter
committed
fname_helper.all_rates_html_page_path(full_path=True),
render_all_lines_page(fname_helper, args.building_locally))
write_html_page(

Ross John Hunter
committed
fname_helper.sim_matrices_html_page_path(full_path=True),
render_sim_matrices_page(fname_helper))
write_html_page(

Ross John Hunter
committed
fname_helper.extra_bar_charts_html_page_path(full_path=True),
render_bar_charts(

Ross John Hunter
committed
fname_helper, streams, metrics=EXTRA_BAR_CHARTS.keys()))
write_html_page(

Ross John Hunter
committed
fname_helper.extra_sim_matrices_html_page_path(full_path=True),
render_extra_sim_matrices(fname_helper, streams))
# Main page

Ross John Hunter
committed
with open(fname_helper.index_html_page_path(full_path=True),
"w") as html_file:
html = render_single_test_page(process, stream_config,
input_config_path, streams, args)
html_file.write(html)
# Prepare messages to GitLab

Ross John Hunter
committed
write_message(fname_helper, rate_bw_info_by_line)
return
def total_bw_to_disk_bar_chart(stream_configs: list[str]):
disk_process = "spruce"
DISK_TDR_BANDWIDTHS = TDR_BANDWIDTHS[disk_process]
bandwidths = {
"Current": {

Ross John Hunter
committed
PRETTY_STREAM_NAMES.get(stream_config, stream_config):
total_rate_and_bw(FileNameHelper(disk_process, stream_config))[1]
for stream_config in stream_configs
},
"TDR": {

Ross John Hunter
committed
PRETTY_STREAM_NAMES.get(stream_config, stream_config):
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
DISK_TDR_BANDWIDTHS[stream_config]['total']
for stream_config in stream_configs
}
}
bandwidths['Current']['Total'] = sum(bandwidths['Current'].values())
bandwidths['TDR']['Total'] = sum(bandwidths['TDR'].values())
print("Summary of bandwidths to disk:")
print(bandwidths)
_important_bar_chart_maker(
bandwidths, disk_process, is_to_total_to_disk_bar_chart=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='make_bandwidth_test_page')
parser.add_argument(
'--per-test-info',
type=str,
nargs='+',
required=True,
help=
"List of strings, each being a colon-separated list corresponding to <process>:<stream_config>:<input_config_yaml_path>"
)
parser.add_argument(
'-s',
'--script-path',
type=str,
required=True,
help=
'Path to the top-level testing script that is running/calling this script.'
)
parser.add_argument(
'--building-locally',
action='store_true',
help=
'Makes links between pages work for building the pages locally rather than on the LHCbPR website.'
)
args = parser.parse_args()
processes_and_stream_configs = []
# Unpack args.per_test_info into process, stream_config, input_config
for per_test_info in args.per_test_info:
assert len(
per_test_info.split(':')
) == 3, "per_test_info must be colon-separated list of <process>:<stream_config>:<input_config_yaml_path>"
process, stream_config, input_config = per_test_info.split(':')
assert process in ['hlt1', 'hlt2', 'spruce'
], "process must be one of 'hlt1', 'hlt2', 'spruce'"
make_html_for_single_test(process, stream_config, input_config, args)
processes_and_stream_configs.append((process, stream_config))
# Bar chart of total bandwidth to disk
expected_stream_configs_to_disk = KNOWN_STREAM_CONFIGS_BY_STAGE["spruce"]
to_disk_stream_configs = [
stream_config
for process, stream_config in processes_and_stream_configs
if process == 'spruce'
]
make_total_bw_to_disk_bar_chart = sorted(
expected_stream_configs_to_disk) == sorted(to_disk_stream_configs)
if make_total_bw_to_disk_bar_chart:
total_bw_to_disk_bar_chart(to_disk_stream_configs)
# Top-level page
base_path = FileNameHelper.base_html_path(args.building_locally)
with open(FileNameHelper.top_level_index_html_path(), "w") as html_file:
html = render_top_level_page(
args.script_path,
base_path,
processes_and_stream_configs,
to_disk_bar_chart=make_total_bw_to_disk_bar_chart)
html_file.write(html)