Newer
Older
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
import argparse
import jinja2
import matplotlib.pyplot as plt
import pandas as pd
import json
from math import log10
from dataclasses import dataclass, field
from typing import List
from collections import namedtuple
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS, KNOWN_STREAM_CONFIGS_BY_STAGE
MAIN_HISTOGRAMS = ["rate", "total_size", "tot_bandwidth"]
EXTRA_HISTOGRAMS = ["dst_data_size", "dst_bandwidth"]
MAIN_BAR_CHARTS = {
"rate": 'Rate (kHz)',
"bandwidth": 'Bandwidth (GB/s)',
}
EXTRA_BAR_CHARTS = {"dstbandwidth": 'DstData Bandwidth (GB/s)'}
TDR_BANDWIDTHS = {
"hlt2": {
"production": {
"full": 5.90,
"turbo": 2.50,
"turcal": 1.60,
"total": 10.00,
},
},
"spruce": {
"wg": {
"total": 0.80
},
"wgpass": {
"total": 2.50
},
"turcal": {
"total": 0.2
},
"no_bias": {
"total": 0.0
},
"hlt2calib": {
"total": 0.0
},
"lumi": {
"total": 0.0
}
}
}
PRETTY_STREAM_NAMES = {
"slepton": "SL",
"sl": "SL",
"qee": "QEE",
"rd": "RD",
"bandq": "B&Q",
"b_to_open_charm": "B2OC",
"b2oc": "B2OC",
"bnoc": "BnoC",
"b_to_charmonia": "B2CC",
"b2cc": "B2CC",
"charm": "Charm",
"ift": "IFT",
"full": "Full",
"turbo": "Turbo",
"turcal": "TurCal",
"Turcal_mDST": "MDST",
"Turcal_persistreco": "PersistReco",
"Turcal_rawbanks": "RawBanks",
"Turcal_persistrecorawbanks": "PRRawBanks",
"no_bias": "NoBias",
"lumi": "Lumi",
"hlt2calib": "Calib"
}
MAP_STREAMS = {
"streamless": "streamless",
"production": "production",
"wg": "Full",
"wgpass": "Turbo",
"turcal": "TurCal",
"no_bias": "NoBias",
"lumi": "Lumi",
"hlt2calib": "Calib"
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
} # TODO obviated if spruce stream configs actually had these names
def render_top_level_page(script_path: str,
base_path: str,
test_configs: List[tuple[str, str]],
to_disk_bar_chart=False):
html_str = f"""
<html>
<head></head>
<body>
<p>
slot.build_id: $$version$$<br>
start time: $$start_time$$<br>
end time: $$end_time$$<br>
platform: $$platform$$<br>
hostname: $$hostname$$<br>
cpu_info: $$cpu_info$$<br>
testing script path: {script_path}
</p>
<ul>
<li><a href="{base_path}/run.log">Logs</a></li>
</ul>
<p>
The bandwidth test ran the following sub-tests (process, streaming configuration): {test_configs}<br>
The appropriate webpages can be found below for each test below. Scroll down for a report of the test's memory consumption.
<ul>
"""
for process, stream_config in test_configs:
fname_helper = FileNameHelper(process)
extra_sfx = f" (of {MAP_STREAMS[stream_config]}-stream output)" if process == "spruce" else ""
html_str += f"""
<li><a href="{base_path}/{fname_helper.index_html_page_path(stream_config)}">{process.capitalize()} {stream_config}{extra_sfx} results</a></li>
"""
html_str += """</ul></p>"""
if to_disk_bar_chart:
html_str += f"""
<p>
Summary of bandwidth of all streams to disk (only available for those tests that run all sprucing stages):<br>
</p>
<object type="image/png" data="{fname_helper.to_disk_bar_chart_path(full_path=False)}"></object>
"""
html_str += """
<p>
<b> Memory consumption of this test: </b>
</p>
<object type="image/png" data="memory_consumption.png"></object>
<p>
Memory consumption as functions of Wall-time. <br>
The virtual memory size is the total amount of memory the process may hypothetically access. <br>
The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
Swap memory is used when RAM is full. <br>
The maximum resident set size usage is $$max_rss$$ GB. <br>
The maximum proportional set size usage is $$max_pss$$ GB. <br>
</p>
</body>
</html>"""
return _render(html_str)
def render_single_test_page(process: str, stream_config: str,
input_config_path: str, streams: List[str],
args: argparse.Namespace):
fname_helper = FileNameHelper(process)
base_path = fname_helper.base_html_path(args.building_locally)
input_info = parse_yaml(input_config_path)
exit_code = 1 # Assume failure
with open(fname_helper.message_path(), "r") as f:
exit_code = int(json.load(f)[process][stream_config]["code"])
if exit_code == 0:
exit_code_sentence = "All sub-jobs in this test exited successfully."
else:
exit_code_sentence = "There were errors in some of the sub-jobs of this test; please see the logs."
headline_bar_chart_path = ""
if process != "hlt1":
headline_bar_chart_path = fname_helper.headline_bar_chart_path(
stream_config, full_path=False)
html_str = f"""
<html>
<head></head>
<body>
<p style="color:{'green' if exit_code == 0 else 'red'}">
<b>{exit_code_sentence}</b>
</p>
<p>
This page contains the results of the {process} bandwidth test with the {stream_config} streaming configuration. Scroll down to see:
<li> Summary of main results, </li>
<li> Details of the streaming configuration, </li>
<li> Links to other html pages produced by this test, </li>
<li> Bar charts of rate and bandwidth for each WG within each stream (HLT2 only), </li>
<li> A pie chart of all lines split by WGs (HLT2 and sprucing only), </li>
<li> Information about the input sample, </li>
<li> Stacked histograms of all lines, split by WG, of rate/bandwidth metrics. </li>
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
<b>Main results:</b> <br>
<object type="image/png" data="{headline_bar_chart_path}"></object>
</p>
"""
with open(
fname_helper.final_rate_table_all_streams_path(stream_config),
"r") as rate_html:
html_str += rate_html.read()
total_rate, total_bw = total_rate_and_bw(fname_helper, stream_config)
html_str += f"""
<p>
<b>The total bandwidth (rate) was measured to be {total_bw:.2f} GB/s ({total_rate:.2f} kHz).</b><br>
</p>
"""
stream_config_json_path = fname_helper.stream_config_json_path(
stream_config, full_path=False)
html_str += f"""
<p>
The streaming configuration (i.e. which lines went to each stream) can be found in JSON format
<a href="{base_path}/{stream_config_json_path}">here</a>. <br>
This streaming configuration is our current set of lines to be used in the next data-taking period. <br>
"DstData" is the raw bank to which reconstructed information (candidates, other reconstructed tracks etc.) are saved. <br>
The "DstData bandwidth" is therefore the bandwidth counting only that raw bank. <br>
The total event size (and total bandwidth) count all raw banks (incl. DstData, and detector raw banks if present) in the file. <br>
"""
if process != "hlt1":
html_str += """
<b>NB:
In real data-taking, raw banks are now left uncompressed when writing, and then the whole file is compressed afterwards.
We account for this compression by multiplying event sizes and bandwidths by a scaling factor to give accurate per-stream bandwidths.
The scaling factor is calculated for each file in the test as: '(size of the compressed file) / (size of the uncompressed file)'.
</b>
<br>
</p>
"""
else:
html_str += """</p>"""
html_str += f"""
<p>
Further results can be found in the links below:
</p>
<ul>
{list_of_links_html(fname_helper, stream_config, args.building_locally)}
$${fname_helper.comparison_str(stream_config)}$$
</b></b>
</ul>
<p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p>
{render_bar_charts(fname_helper, stream_config, streams)}
{render_lines_pie_chart(fname_helper, stream_config)}
<p>
<b>Input sample information:</b>
<ul>
<li>Config file: {os.path.expandvars(input_config_path)}</li>
<li>Input rate: {input_info['input_rate']} kHz</li>
<li>Number of interactions per bunch crossing (ν): {input_info['nu']}</li>
<li>Radius of VELO opening: {input_info['velo_radial_opening']} mm</li>
</ul>
</p>
<p>
<b>Stacked histograms of all lines, split by WG, of rate/bandwidth metrics:</b> <br>
The total distributions are shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page linked above. <br>
Total event size is calculated from summing all raw banks in the file (including DstData). <br>
Where appropriate, the DstData raw bank size and DstData bandwidth are calculated from summing only the DstData raw bank. <br>
</p>
"""
for hist_suffix in MAIN_HISTOGRAMS:
html_str += f"""
<object type="image/png" data="{fname_helper.hist_path(stream_config, hist_suffix, full_path=False)}"></object>
"""
html_str += f"""
{render_dst_data_hists(fname_helper, stream_config)}
</body>
</html>
"""
return _render(html_str)
@dataclass
class WGRateBWInfo:
nlines: int = 0
rate: List[float] = field(default_factory=lambda: [])
dst_size: List[float] = field(default_factory=lambda: [])
tot_size: List[float] = field(default_factory=lambda: [])
dst_bw: List[float] = field(default_factory=lambda: [])
tot_bw: List[float] = field(default_factory=lambda: [])
LineRateBWInfo = namedtuple(
"LineRateBWInfo", ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"])
def histo_maker(entry_list,
xlabel,
plot_path,
nbins=100,
range=None,
take_log=False,
stacked=False,
labels=[],
legend=False):
title = ""
safe_log = lambda rate: log10(rate) if rate > float(f'1e{log_th}') else log_th - 1
title = f"(all values <= log10(1e{log_th}) are in the first bin)"
if stacked:
# entry_list is a list of lists
entry_list = [[safe_log(rate) for rate in lst]
for lst in entry_list]
else:
entry_list = [safe_log(rate) for rate in entry_list]
fig = plt.figure()
if range:
# If specified, range should be a 2-tuple of floats (low, high)
plt.hist(entry_list, nbins, range=range, stacked=stacked, label=labels)
else:
plt.hist(entry_list, nbins, stacked=stacked, label=labels)
plt.xlabel(xlabel)
plt.ylabel("Number of lines")
if title: plt.title(title)
if legend: plt.legend(loc='upper right')
plt.yscale('log', nonpositive='clip')
plt.savefig(plot_path, format="png")
plt.close(fig)
def list_of_other_lines(process, all_lines_bw_info):
return [
line for line in all_lines_bw_info.keys()
if guess_wg(line, process) == "Other"
]
def make_plots(all_lines_bw_info: dict[str, LineRateBWInfo],
fname_helper: FileNameHelper, stream_config: str):
Make plots of rate, bandwidth and event sizes of all lines.
It will create 5 stacked histograms containing distributions of all lines
grouped by WG, and a pie chart showing the number of lines per WG.
all_lines_bw_info: dict(line_name: LineRateBWInfo object)
fname_helper: instance of FileNameHelper
stream_config: e.g. "production" or "wg"
'''
# Count number of lines and rates/evt sizes per WG
rate_info_per_wg = {
wg: WGRateBWInfo()
for wg in KNOWN_WORKING_GROUPS + ["Other"]
}
for line, bw_info in all_lines_bw_info.items():
wg_guess = guess_wg(line, fname_helper.process)
rate_info_per_wg[wg_guess].nlines += 1
for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
getattr(rate_info_per_wg[wg_guess], attrib).append(
getattr(bw_info, attrib))
rate_info_per_wg = {
k: info
for k, info in rate_info_per_wg.items() if info.nlines != 0
}
# Make a pie chart of lines per WG
labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()]
pie = plt.pie([v.nlines for v in rate_info_per_wg.values()],
radius=1,
wedgeprops=dict(width=0.4, edgecolor="w"))
plt.legend(
pie[0],
labels,
loc='center',
bbox_to_anchor=(1, 0.5),
bbox_transform=plt.gcf().transFigure)
plt.title(f"Number of {fname_helper.process.capitalize()} lines per WG")
plt.savefig(
fname_helper.pie_chart_path(stream_config, full_path=True),
format="png",
bbox_inches='tight')
# Stacked histograms
for attrib, xtitle, plot_bit, log_th, range in zip(
["rate", "tot_size", "tot_bw", "dst_size", "dst_bw"], [
"Log10(Rate [Hz])", "Total Event Size [kB]",
"Log10(Bandwidth from Total Event Size [GB/s])",
"DstData RawBank Size [kB]",
"Log10(Bandwidth from DstData Size [GB/s])"
], MAIN_HISTOGRAMS + EXTRA_HISTOGRAMS, [-1, 0, -4, 0, -4],
[(-2, 7),
(0, 500 if fname_helper.process == 'hlt2' else 1000), (-5, 2),
(0, 500 if fname_helper.process == 'hlt2' else 1000), (-5, 2)]):
histo_maker(
[getattr(info, attrib) for info in rate_info_per_wg.values()],
xtitle,
fname_helper.hist_path(stream_config, plot_bit, full_path=True),
take_log="Log10" in xtitle,
stacked=True,
legend=True,
labels=list(rate_info_per_wg.keys()))
return
def _important_bar_chart_maker(bandwidths: dict[str, dict[str, float]],
process: str,
stream_config="",
is_to_total_to_disk_bar_chart=True):
fname_helper = FileNameHelper(process)
colors = {'Current': 'tab:orange', 'TDR': 'tab:grey'}
width = 0.4
fig, ax = plt.subplots()
plt.grid(True, axis='y', zorder=0, linestyle='dashed')
for i_col, (label, bandwidths_by_stream) in enumerate(bandwidths.items()):
offset = width * i_col
bars = ax.bar([x + offset for x in range(len(bandwidths_by_stream))],
bandwidths_by_stream.values(),
width=width,
label=label,
zorder=3,
color=colors[label])
if process == "spruce" and not is_to_total_to_disk_bar_chart:
# Only label the last bar - dont have per-WG expectations
ax.bar_label(
bars,
labels=[''] * (len(bars) - 1) +
[round(bandwidths_by_stream['Total'], 2)])
else:
ax.bar_label(
bars,
labels=[
round(val, 2) for val in bandwidths_by_stream.values()
])
ax.set_ylabel('Bandwidth (GB/s)')
if is_to_total_to_disk_bar_chart:
title = "Sprucing (output to disk)"
else:
title = {
"hlt2": "Hlt2 (output to tape)",
"spruce":
f"Sprucing of {MAP_STREAMS[stream_config]} stream to disk"
}[process]
ax.set_title(title)
# Have to do weird stuff with ticks for (spruce and not important chart) as only have 1 TDR bar
tick_pos_opt = 'weird' if process == 'spruce' and not is_to_total_to_disk_bar_chart else 'default'
'default': [x + width / 2 for x in range(len(bandwidths_by_stream))],
'weird': [x for x in range(len(bandwidths_by_stream) - 1)] +
[len(bandwidths_by_stream) - 1 + width / 2]
}[tick_pos_opt]
ax.set_xticks(tick_positions, bandwidths_by_stream.keys())
ax.legend(loc='upper center', ncols=2)
plot_path = fname_helper.to_disk_bar_chart_path(
full_path=True
) if is_to_total_to_disk_bar_chart else fname_helper.headline_bar_chart_path(
stream_config, full_path=True)
plt.savefig(plot_path, format="png")
plt.close(fig)
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
def headline_bar_charts(fname_helper: FileNameHelper, stream_config: str):
"""Headline bar chart of rate/bandwidth per stream c.f. TDR"""
process = fname_helper.process
rates_df = pd.read_csv(
fname_helper.final_rate_table_all_streams_path(
stream_config, ext='csv'))
bandwidths = {
"Current":
dict(zip(rates_df['Stream'], rates_df['Total Bandwidth (GB/s)'])),
"TDR": {
stream: TDR_BANDWIDTHS[process][stream_config].get(stream, 0)
for stream in rates_df['Stream'].to_list()
}
}
for series in ["Current", "TDR"]:
bandwidths[series] = {
PRETTY_STREAM_NAMES.get(stream, stream): val
for stream, val in bandwidths[series].items()
}
bandwidths['Current']['Total'] = sum(bandwidths['Current'].values())
bandwidths['TDR']['Total'] = TDR_BANDWIDTHS[process][stream_config][
'total']
_important_bar_chart_maker(
bandwidths,
process,
stream_config,
is_to_total_to_disk_bar_chart=False)
def _make_bar_chart(rates_df, column, stream, plot_path):
"""Bar charts of the WG-by-WG rates within 1 stream"""
fig = plt.figure()
plt.grid(True, axis='y', zorder=0, linestyle='dashed')
bars = plt.bar(rates_df['WG'], rates_df[column], zorder=3)
plt.bar_label(bars, labels=[round(val, 2) for val in rates_df[column]])
plt.ylabel(column)
plt.xticks(rates_df['WG'], rates_df['WG'], rotation='vertical')
plt.subplots_adjust(bottom=0.25)
plt.title(f'{column} for each WG in the {stream.capitalize()} stream')
plt.savefig(plot_path, format="png")
plt.close(fig)
def make_per_wg_bar_charts(fname_helper: FileNameHelper, stream_config: str,
streams: list[str]):
all_bar_charts = {**MAIN_BAR_CHARTS, **EXTRA_BAR_CHARTS}
for stream in streams:
print(f"Making per-WG bar charts for {stream}")
try:
intra_stream_rates_df = pd.read_csv(
fname_helper.tmp_rate_table_intra_stream_path(
stream_config, stream),
header=None)
# NOTE beware if the ordering of the columns ever changes in line-and-stream-rates.py
intra_stream_rates_df.columns = ['WG'] + list(
all_bar_charts.values())
for metric, column in all_bar_charts.items():
_make_bar_chart(
intra_stream_rates_df, column, stream,
fname_helper.bar_chart_path(
stream_config, stream, metric, full_path=True))
except pd.errors.EmptyDataError:
print(f"Per-WG bar charts: skipping {stream} as no rates found")
return
def write_html_page(page_path, rendered_html):
if rendered_html:
with open(page_path, "w") as html_file:
html_file.write(rendered_html)
def _render(html_str):
return jinja2.Template(html_str).render()
def render_all_lines_page(fname_helper, stream_config, building_locally):
csv_path = fname_helper.final_rate_table_all_lines_path(
stream_config, "csv", full_path=False)
html_str = f"""
<p>
Rates, event sizes and bandwidths of all lines, listed descending in bandwidth. <br>
Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br>
Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br>
These numbers are also saved in a csv file: <a href="{fname_helper.base_html_path(building_locally)}/{csv_path}">{csv_path}</a>
</p>
"""
with open(
fname_helper.final_rate_table_all_lines_path(
stream_config, "html"), "r") as rate_table:
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
html_str += rate_table.read()
return _render(html_str)
def render_other_line_table(process, lines):
if process == "hlt1":
return _render("")
html_str = """
<p>
List of line names that categorized to "Others".
</p>
"""
html_str += r'''<table border = "1">
<tr>
<th> Name </th>
</tr>'''
for line in lines:
html_str += f'''
<tr>
<td> {line} </td>
</tr>'''
html_str += '\n</table>'
return _render(html_str)
def render_dst_data_hists(fname_helper: FileNameHelper, stream_config: str):
if fname_helper.process == "hlt1":
return _render("")
html_str = ''
for hist_suffix in EXTRA_HISTOGRAMS:
html_str += f"""
<object type="image/png" data="{fname_helper.hist_path(stream_config, hist_suffix, full_path=False)}"></object>
"""
return _render(html_str)
def render_lines_pie_chart(fname_helper: FileNameHelper, stream_config: str):
if fname_helper.process == "hlt1":
return _render("")
return _render(f"""
<p>
<b>The number of selection lines per working group:</b> <br>
</p>
<object type="image/png" data="{fname_helper.pie_chart_path(stream_config, full_path=False)}"></object>
<p>
"Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
To make lines properly categorized, one should follow the naming convention -
name of lines should start with `Hlt2/Spruce[WG]_`.
</p>
""")
def render_bar_charts(fname_helper: FileNameHelper,
stream_config: str,
streams: list[str],
metrics=MAIN_BAR_CHARTS.keys()):
if fname_helper.process != "hlt2":
return _render("")
html_str = ''
for metric in metrics:
html_str += f'''
<p>
<b>{metric.capitalize()} within each stream:</b>
</p>
<p>
"TotalInclusive" is the physical rate/bandwidth of the stream. "SumWGs" is the simple arithmetic sum of all bars except "TotalInclusive".<br>
The difference between the two bars gives us information about the degree of WG-by-WG overlap.
</p>
'''
if stream_config == "production":
html_str += f'''
<p>
<b>Note:</b> The WG bars in the HLT2 Turbo stream correspond almost exactly to the output streams of the Sprucing passthrough of Turbo.<br>
Furthermore, this means <b>the "SumWGs" {metric} bar of HLT2 turbo is approximately equal to the total physical {metric} of Turbo post-sprucing.</b><br>
</p>
'''
for stream in streams:
html_str += f'''
<object type="image/png" data="{fname_helper.bar_chart_path(stream_config, stream, metric, full_path=False)}"></object>
'''
html_str += '''
<p>
Rates for a WG within a stream are calculated by counting the number of events saved to that stream in which at least 1 of that WG's lines fired.<br>
Bandwidths for a WG are calculated by summing the event size of all events saved to the stream in which at least 1 of that WG's lines fired.<br>
</p>
'''
return _render(html_str)
SIM_MATRICES_DESCR = """
<p>
The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
It shows how much events in the stream A are covered by another stream B. <br>
The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
</p>
<p>
The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
It shows how similar the two streams are and is useful in bandwidth division. <br>
</p>
"""
def render_sim_matrices_page(fname_helper: FileNameHelper, stream_config: str):
if fname_helper.process == "hlt1":
return _render("")
html_str = SIM_MATRICES_DESCR
html_str += f"""
<p>
The overlap matrix of the {stream_config} streams is:
</p>
"""
with open(fname_helper.overlap_matrix_path(stream_config), "r") as overlap:
html_str += overlap.read()
html_str += f"""
<p>
The Jaccard similarity matrix of the {stream_config} streams is:
</p>
"""
with open(fname_helper.jaccard_similarities_path(stream_config),
"r") as jaccard:
html_str += jaccard.read()
return _render(html_str)
def render_extra_sim_matrices(fname_helper: FileNameHelper, stream_config: str,
streams: list[str]):
if fname_helper.process != "hlt2":
return _render("")
html_str = SIM_MATRICES_DESCR
for stream in streams:
html_str += f"""
<p>
The overlap matrix of the {stream.capitalize()} stream is:
</p>
"""
with open(
fname_helper.intra_stream_overlap_matrix_path(
stream_config, stream), "r") as overlap:
html_str += overlap.read()
html_str += f"""
<p>
The Jaccard similarity matrix of the {stream.capitalize()} stream is:
</p>
"""
with open(
fname_helper.intra_stream_jaccard_similarities_path(
stream_config, stream), "r") as jaccard:
html_str += jaccard.read()
return _render(html_str)
def list_of_links_html(fname_helper: FileNameHelper, stream_config: str,
building_locally: bool):
base_path = fname_helper.base_html_path(building_locally)
_all_rates_path = fname_helper.all_rates_html_page_path(
stream_config, full_path=False)
links = [
f"""<li><a href="{base_path}/{_all_rates_path}"> A single rate/bandwidth table featuring every trigger line in all streams</a></li>"""
if fname_helper.process != "hlt1":
_rate_table_split_by_stream = fname_helper.final_rate_table_all_lines_split_by_stream_path(
stream_config, full_path=False)
links.append(
f"""<li><a href="{base_path}/{_rate_table_split_by_stream}"> Rate/bandwidth tables for each stream, with 1 row per trigger line</a></li>"""
_rate_table_by_stream_by_wg = fname_helper.final_rate_table_all_lines_split_by_stream_by_wg_path(
stream_config, full_path=False)
links.append(
f"""<li><a href="{base_path}/{_rate_table_by_stream_by_wg}"> Rate/bandwidth tables for each stream, split also by WG, with 1 row per trigger line</a></li>"""
_sim_matrices_path = fname_helper.sim_matrices_html_page_path(
stream_config, full_path=False)
links.append(
f"""<li><a href="{base_path}/{_sim_matrices_path}"> Jaccard similarity and overlap matrices between streams</a></li>"""
)
if fname_helper.process == "hlt2":
_extra_bar_charts_path = fname_helper.extra_bar_charts_html_page_path(
stream_config, full_path=False)
_extra_sim_matrices_path = fname_helper.extra_sim_matrices_html_page_path(
stream_config, full_path=False)
links += [
f"""<li><a href="{base_path}/{_extra_bar_charts_path}">Bar charts as below for DstData bandwidth</a></li>""",
f"""<li><a href="{base_path}/{_extra_sim_matrices_path}">Similarity and overlap matrices between WGs within each stream</a></li>""",
]
if fname_helper.process != "hlt1":
_other_lines_path = fname_helper.other_lines_html_page_path(
stream_config, full_path=False)

Ross John Hunter
committed
_line_descr_path = fname_helper.line_descr_path(
stream_config, full_path=False)
links += [
f"""<li><a href="{base_path}/{_other_lines_path}">List of lines in "Other" category</a></li>""",
f"""<li><a href="{base_path}/{_line_descr_path}"> PersistReco and ExtraOutput info for all lines in all streams</a></li>"""
]
return "\n".join(links)
def total_rate_and_bw(fname_helper: FileNameHelper, stream_config: str):
streams_df = pd.read_csv(
fname_helper.final_rate_table_all_streams_path(
stream_config, ext="csv"))
return sum(streams_df['Rate (kHz)']), sum(
streams_df['Total Bandwidth (GB/s)'])
def write_message(fname_helper: FileNameHelper, stream_config: str,
bw_info_by_line: dict[str, LineRateBWInfo]):
"""Append to message.json for the BandwidthTestHandler to send info to Gitlab and Mattermost"""
high = {
'hlt1': 1e6,
'hlt2': 1000,
'spruce': 500
}[fname_helper.process] # Hz
n_low_rate = len(
[info for info in bw_info_by_line.values() if info.rate == 0])
n_high_rate = len(
[info for info in bw_info_by_line.values() if info.rate > high])
tot_rate, tot_bandwidth = total_rate_and_bw(fname_helper, stream_config)
# load up message.json
with open(fname_helper.message_path(), "r") as message:
info = json.load(message)
info[fname_helper.process][stream_config]["total_rate"] = tot_rate
info[fname_helper.
process][stream_config]["total_bandwidth"] = tot_bandwidth
info[fname_helper.process][stream_config]["n_low_rate"] = n_low_rate
info[fname_helper.process][stream_config]["n_high_rate"] = n_high_rate
with open(fname_helper.message_path(), "w") as f:
json.dump(info, f, indent=4)
return 0
def make_html_for_single_test(process: str, stream_config: str,
input_config_path: str,
args: argparse.Namespace):
fname_helper = FileNameHelper(process)
with open(fname_helper.stream_config_json_path(stream_config),
"r") as stream_config_json:
streams = list(json.load(stream_config_json).keys())
### Make plots & tables
# Headline bar charts
if stream_config != "streamless":
headline_bar_charts(fname_helper, stream_config)
if process == 'hlt2':
make_per_wg_bar_charts(fname_helper, stream_config, streams)
df = pd.read_csv(
fname_helper.final_rate_table_all_lines_path(stream_config, "csv"),
sep=',')
kHz_to_Hz = 1000
rate_bw_info_by_line = {
df['Line'][i]: LineRateBWInfo(
df['Rate (kHz)'][i] * kHz_to_Hz, df["Avg DstData Size (kB)"][i],
df["Avg Total Event Size (kB)"][i],
df["DstData Bandwidth (GB/s)"][i], df["Total Bandwidth (GB/s)"][i])
for i in range(len(df))
make_plots(
fname_helper=fname_helper,
stream_config=stream_config)
# Extra pages
write_html_page(
fname_helper.other_lines_html_page_path(stream_config, full_path=True),
render_other_line_table(
process, list_of_other_lines(process, rate_bw_info_by_line)))
write_html_page(
fname_helper.all_rates_html_page_path(stream_config, full_path=True),
render_all_lines_page(fname_helper, stream_config,
args.building_locally))
write_html_page(
fname_helper.sim_matrices_html_page_path(
stream_config, full_path=True),
render_sim_matrices_page(fname_helper, stream_config))
write_html_page(
fname_helper.extra_bar_charts_html_page_path(
stream_config, full_path=True),
render_bar_charts(
fname_helper,
stream_config,
streams,
metrics=EXTRA_BAR_CHARTS.keys()))
write_html_page(
fname_helper.extra_sim_matrices_html_page_path(
stream_config, full_path=True),
render_extra_sim_matrices(fname_helper, stream_config, streams))
# Main page
with open(
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
fname_helper.index_html_page_path(stream_config, full_path=True),
"w") as html_file:
html = render_single_test_page(process, stream_config,
input_config_path, streams, args)
html_file.write(html)
# Prepare messages to GitLab
write_message(fname_helper, stream_config, rate_bw_info_by_line)
return
def total_bw_to_disk_bar_chart(stream_configs: list[str]):
disk_process = "spruce"
fname_helper = FileNameHelper(disk_process)
DISK_TDR_BANDWIDTHS = TDR_BANDWIDTHS[disk_process]
bandwidths = {
"Current": {
MAP_STREAMS[stream_config]: total_rate_and_bw(
fname_helper, stream_config)[1]
for stream_config in stream_configs
},
"TDR": {
MAP_STREAMS[stream_config]:
DISK_TDR_BANDWIDTHS[stream_config]['total']
for stream_config in stream_configs
}
}
bandwidths['Current']['Total'] = sum(bandwidths['Current'].values())
bandwidths['TDR']['Total'] = sum(bandwidths['TDR'].values())
print("Summary of bandwidths to disk:")
print(bandwidths)
_important_bar_chart_maker(
bandwidths, disk_process, is_to_total_to_disk_bar_chart=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='make_bandwidth_test_page')
parser.add_argument(
'--per-test-info',
type=str,
nargs='+',
required=True,
help=
"List of strings, each being a colon-separated list corresponding to <process>:<stream_config>:<input_config_yaml_path>"
)
parser.add_argument(
'-s',
'--script-path',
type=str,
required=True,
help=
'Path to the top-level testing script that is running/calling this script.'
)
parser.add_argument(
'--building-locally',
action='store_true',
help=
'Makes links between pages work for building the pages locally rather than on the LHCbPR website.'
)
args = parser.parse_args()
processes_and_stream_configs = []
# Unpack args.per_test_info into process, stream_config, input_config
for per_test_info in args.per_test_info:
assert len(
per_test_info.split(':')
) == 3, "per_test_info must be colon-separated list of <process>:<stream_config>:<input_config_yaml_path>"
process, stream_config, input_config = per_test_info.split(':')
assert process in ['hlt1', 'hlt2', 'spruce'
], "process must be one of 'hlt1', 'hlt2', 'spruce'"
make_html_for_single_test(process, stream_config, input_config, args)
processes_and_stream_configs.append((process, stream_config))
# Bar chart of total bandwidth to disk
expected_stream_configs_to_disk = KNOWN_STREAM_CONFIGS_BY_STAGE["spruce"]
to_disk_stream_configs = [
stream_config
for process, stream_config in processes_and_stream_configs
if process == 'spruce'
]
make_total_bw_to_disk_bar_chart = sorted(
expected_stream_configs_to_disk) == sorted(to_disk_stream_configs)
if make_total_bw_to_disk_bar_chart:
total_bw_to_disk_bar_chart(to_disk_stream_configs)
# Top-level page
base_path = FileNameHelper.base_html_path(args.building_locally)
with open(FileNameHelper.top_level_index_html_path(), "w") as html_file:
html = render_top_level_page(
args.script_path,
base_path,
processes_and_stream_configs,
to_disk_bar_chart=make_total_bw_to_disk_bar_chart)
html_file.write(html)