Newer
Older
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
import argparse
import jinja2
import matplotlib.pyplot as plt
import pandas as pd
from math import log10
from dataclasses import dataclass, field
from typing import List
from collections import namedtuple
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS
SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template("""
<html>
<head></head>
<body>
{{TOP_LEVEL_HEADER}}
<p style="color:{{EXIT_CODE_COLOUR}}">
<b>{{EXIT_CODE_SENTENCE}}</b>
</p>
This page contains the results of the {{PROCESS}} bandwidth test. Main results: <br>
<object type="image/png" data="{{HEADLINE_BAR_CHART_PATH}}"></object>
{{main_rate_table}}
The streaming configuration (i.e. which lines went to each stream) can be found in JSON format
<a href="{{BASE_PATH}}/{{stream_config_json}}">here</a>. <br>
This streaming configuration is our current set of lines to be used in the next data-taking period. <br>
"DstData" is the raw bank to which reconstructed information (candidates, other reconstructed tracks etc.) are saved. <br>
The "DstData bandwidth" is therefore the bandwidth counting only that raw bank. <br>
The total event size (and total bandwidth) count all raw banks (incl. DstData, and detector raw banks if present) in the file.
<b>NB:
In real data-taking, raw banks are now left uncompressed when writing, and then the whole file is compressed afterwards.
We account for this compression by multiplying event sizes and bandwidths by a scaling factor to give accurate per-stream bandwidths.
The scaling factor is calculated for each file in the test as: '(size of the compressed file) / (size of the uncompressed file)'.
</b>
<br>
<p> Scroll down to see: </p>
<ul>
<li> Bar charts of rate and bandwidth for each WG within each stream (HLT2 only), </li>
<li> A pie chart of all lines split by WGs (HLT2 and sprucing only), </li>
<li> Information about the input sample, </li>
<li> Stacked histograms of all lines, split by WG, of rate/bandwidth metrics, </li>
<li> Memory consumption of the test as a function of time. </li>
</ul>
<p>
Further results can be found in the links below:
</p>
<ul>
{{LIST_OF_LINKS}}
$${{PROCESS}}__comparison$$
</b></b>
</ul>
<p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p>
{{BAR_CHARTS}}
{{LINES_PER_WG}}
<b>Input sample information:</b>
<ul>
<li>Config file: {{INPUT_CONFIG_PATH}}</li>
<li>Input rate: {{INPUT_RATE}} kHz</li>
<li>Number of interactions per bunch crossing (ν): {{INPUT_NU}}</li>
<li>Radius of VELO opening: {{INPUT_VELO_RADIUS}} mm</li>
</ul>
</p>
<b>Stacked histograms of all lines, split by WG, of rate/bandwidth metrics:</b> <br>
The total distributions are shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page linked above. <br>
Total event size is calculated from summing all raw banks in the file (including DstData) and then multiplying by a per-stream compression factor. <br>
Where appropriate, the DstData raw bank size and DstData bandwidth are calculated from summing only the DstData raw bank and then multiplying by a per-stream compression factor. <br>
<object type="image/png" data="{{PROCESS}}__hist__tot_bandwidth.png"></object>
<object type="image/png" data="{{PROCESS}}__hist__rate.png"></object>
<object type="image/png" data="{{PROCESS}}__hist__total_size.png"></object>
{{DST_DATA_HIST}}
{{MEMORY_CONSUMPTION}}
</body>
</html>
""")
HLT2_AND_SPRUCE_REPORT_TEMPLATE = jinja2.Template("""
<html>
<head></head>
<body>
{{TOP_LEVEL_HEADER}}
<p>
The bandwidth test ran an Hlt2 test, and then a Sprucing test on the Full-stream output. <br>
The appropriate process-specific webpages can be found below.
</p>
<ul>
<li><a href="{{BASE_PATH}}/hlt2__index.html">Hlt2 results</a></li>
<li><a href="{{BASE_PATH}}/spruce__index.html">Sprucing results</a></li>
</ul>
{{MEMORY_CONSUMPTION}}
</body>
</html>""")
@dataclass
class WGRateBWInfo:
nlines: int = 0
rate: List[float] = field(default_factory=lambda: [])
dst_size: List[float] = field(default_factory=lambda: [])
tot_size: List[float] = field(default_factory=lambda: [])
dst_bw: List[float] = field(default_factory=lambda: [])
tot_bw: List[float] = field(default_factory=lambda: [])
LineRateBWInfo = namedtuple(
"LineRateBWInfo", ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"])
def histo_maker(entry_list,
xlabel,
title,
plot_path,
nbins=100,
range=None,
take_log=False,
stacked=False,
labels=[],
legend=False):
if take_log:
safe_log = lambda rate: log10(rate) if rate > float(f'1e{log_th}') else log_th - 1
title = f"{title} (all values <= log10(1e{log_th}) are in the first bin)"
if stacked:
# entry_list is a list of lists
entry_list = [[safe_log(rate) for rate in lst]
for lst in entry_list]
else:
entry_list = [safe_log(rate) for rate in entry_list]
fig = plt.figure()
if range:
# If specified, range should be a 2-tuple of floats (low, high)
plt.hist(entry_list, nbins, range=range, stacked=stacked, label=labels)
else:
plt.hist(entry_list, nbins, stacked=stacked, label=labels)
plt.xlabel(xlabel)
plt.ylabel("Number of lines")
if title: plt.title(title)
if legend: plt.legend(loc='upper right')
plt.yscale('log', nonpositive='clip')
plt.savefig(plot_path, format="png")
plt.close(fig)
def make_plots(all_lines_bw_info, tot_rate, tot_bandwidth, fname_helper,
process):
Make plots of rate, bandwidth and event sizes of all lines.
It will create 5 stacked histograms containing distributions of all lines
grouped by WG, and a pie chart showing the number of lines per WG.
all_lines_bw_info: dict(line_name: LineRateBWInfo object)
tot_rate: total rate of all lines (arithmetic sum of stream rates)
tot_bandwidth: total bandwidth of all lines (arithmetic sum of stream BWs)
fname_helper: instance of FileNameHelper
process: `hlt1`, `hlt2` or `spruce`
Returns:
- list of found WGs with >= 1 line
- list of lines that didnt fit into 1 WG
'''
# Count number of lines and rates/evt sizes per WG
rate_info_per_wg = {
wg: WGRateBWInfo()
for wg in KNOWN_WORKING_GROUPS + ["Other"]
}
for line, bw_info in all_lines_bw_info.items():
wg_guess = guess_wg(line, process)
rate_info_per_wg[wg_guess].nlines += 1
if wg_guess == "Other":
list_other_lines.append(line)
for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
getattr(rate_info_per_wg[wg_guess], attrib).append(
getattr(bw_info, attrib))
rate_info_per_wg = {
k: info
for k, info in rate_info_per_wg.items() if info.nlines != 0
}
# Make a pie chart of lines per WG
labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()]
pie = plt.pie([v.nlines for v in rate_info_per_wg.values()],
radius=1,
wedgeprops=dict(width=0.4, edgecolor="w"))
plt.legend(
pie[0],
labels,
loc='center',
bbox_to_anchor=(1, 0.5),
bbox_transform=plt.gcf().transFigure)
plt.title(f"Number of {process.capitalize()} lines per WG")
plt.savefig(
fname_helper.process_dependent_html_page_outputs_path(
"lines_per_wg.png"),
format="png",
bbox_inches='tight')
# Stacked histograms
title = f"{process.capitalize()}"
for attrib, xtitle, title, plot_bit, take_log, log_th, range in zip(
["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"], [
"Log10(Rate [Hz])", "DstData RawBank Size [kB]",
"Total Event Size [kB]",
"Log10(Bandwidth from DstData Size [GB/s])",
"Log10(Bandwidth from Total Event Size [GB/s])"
], [
f"Total Rate: {tot_rate:.2f} kHz", "", "", "",
f"Total bandwidth: {tot_bandwidth:.2f} GB/s"
], [
"rate", "dst_data_size", "total_size", "dst_bandwidth",
"tot_bandwidth"
], [True, False, False, True, True], [-1, 0, 0, -4, -4],
[(-2, 7), (0, 500 if process == 'hlt2' else 1000),
(0, 500 if process == 'hlt2' else 1000), (-5, 2), (-5, 2)]):
histo_maker(
[getattr(info, attrib) for info in rate_info_per_wg.values()],
xtitle,
title,
fname_helper.process_dependent_html_page_outputs_path(
f"hist__{plot_bit}.png"),
range=range,
take_log=take_log,
stacked=True,
legend=True,
labels=list(rate_info_per_wg.keys()))
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
return list_other_lines
def headline_bar_charts(rates_df: pd.DataFrame, process: str, plot_path: str):
"""Headline bar chart of rate/bandwidth per stream c.f. TDR"""
TDR_BANDWIDTHS = {
"hlt2": {
"full": 5.90,
"turbo": 2.50,
"turcal": 1.60,
"total": 10.00,
},
"spruce": {
"total": 0.80
}
}
TITLES = {
"hlt2": "Hlt2 (output to tape)",
"spruce": "Excl. Sprucing of WG streams to disk"
}
PRETTY_STREAM_NAMES = {
"slepton": "SL",
"qee": "QEE",
"rd": "RD",
"bandq": "B&Q",
"b_to_open_charm": "B2OC",
"bnoc": "BnoC",
"b_to_charmonia": "B2CC",
"full": "Full",
"turbo": "Turbo",
"turcal": "TurCal",
"ift": "IFT"
}
bandwidths = {
"Current":
dict(zip(rates_df['Stream'], rates_df['Total Bandwidth (GB/s)'])),
"TDR": {
stream: TDR_BANDWIDTHS[process].get(stream, 0)
for stream in rates_df['Stream'].to_list()
}
}
for series in ["Current", "TDR"]:
bandwidths[series] = {
PRETTY_STREAM_NAMES.get(stream, stream): val
for stream, val in bandwidths[series].items()
}
bandwidths['Current']['Total'] = sum(bandwidths['Current'].values())
bandwidths['TDR']['Total'] = TDR_BANDWIDTHS[process]['total']
colors = {'Current': 'tab:orange', 'TDR': 'tab:grey'}
width = 0.4
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
fig, ax = plt.subplots()
plt.grid(True, axis='y', zorder=0, linestyle='dashed')
for i_col, (label, bandwidths_by_stream) in enumerate(bandwidths.items()):
offset = width * i_col
bars = ax.bar([x + offset for x in range(len(bandwidths_by_stream))],
bandwidths_by_stream.values(),
width=width,
label=label,
zorder=3,
color=colors[label])
if process == "spruce":
# Only label the last bar - dont have per-WG expectations
ax.bar_label(
bars,
labels=[''] * (len(bars) - 1) +
[round(bandwidths_by_stream['Total'], 2)])
else:
ax.bar_label(
bars,
labels=[
round(val, 2) for val in bandwidths_by_stream.values()
])
ax.set_ylabel('Bandwidth (GB/s)')
ax.set_title(TITLES[process])
tick_positions = {
'hlt2': [x + width / 2 for x in range(len(bandwidths_by_stream))],
'spruce': [x for x in range(len(bandwidths_by_stream) - 1)] +
[len(bandwidths_by_stream) - 1 + width / 2]
}[process]
ax.set_xticks(tick_positions, bandwidths_by_stream.keys())
ax.legend(loc='upper center', ncols=2)
plt.savefig(plot_path, format="png")
plt.close(fig)
def make_bar_charts(rates_df, column, stream, plot_path):
"""Bar charts of the WG-by-WG rates within 1 stream"""
fig = plt.figure()
plt.grid(True, axis='y', zorder=0, linestyle='dashed')
bars = plt.bar(rates_df['WG'], rates_df[column], zorder=3)
plt.bar_label(bars, labels=[round(val, 2) for val in rates_df[column]])
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
plt.ylabel(column)
plt.xticks(rates_df['WG'], rates_df['WG'], rotation='vertical')
plt.subplots_adjust(bottom=0.25)
plt.title(f'{column} for each WG in the {stream.capitalize()} stream')
plt.savefig(plot_path, format="png")
plt.close(fig)
def write_html_page(page_path, rendered_html):
if rendered_html:
with open(page_path, "w") as html_file:
html_file.write(rendered_html)
def _render(html_str):
return jinja2.Template(html_str).render()
def render_all_lines_page(fname_helper, building_locally):
csv_path = fname_helper.final_rate_table_all_lines_path(
"csv", full_path=False)
html_str = f"""
<p>
Rates, event sizes and bandwidths of all lines, listed descending in bandwidth. <br>
Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br>
Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br>
These numbers are also saved in a csv file: <a href="{fname_helper.base_html_path(building_locally)}/{csv_path}">{csv_path}</a>
</p>
"""
with open(fname_helper.final_rate_table_all_lines_path("html"),
"r") as rate_table:
html_str += rate_table.read()
return _render(html_str)
def render_top_level_header(script_path, base_path):
return _render(f"""
<p>
slot.build_id: $$version$$<br>
start time: $$start_time$$<br>
end time: $$end_time$$<br>
platform: $$platform$$<br>
hostname: $$hostname$$<br>
cpu_info: $$cpu_info$$<br>
testing script path: {script_path}
</p>
<ul>
<li><a href="{base_path}/run.log">Logs</a></li>
</ul>
""")
def render_memory_consumption():
return _render("""
<b> Memory consumption of this test: </b>
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
<object type="image/png" data="memory_consumption.png"></object>
<p>
Memory consumption as functions of Wall-time. <br>
The virtual memory size is the total amount of memory the process may hypothetically access. <br>
The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
Swap memory is used when RAM is full. <br>
The maximum resident set size usage is $$max_rss$$ GB. <br>
The maximum proportional set size usage is $$max_pss$$ GB. <br>
</p>
""")
def render_other_line_table(process, lines):
if process == "hlt1":
return _render("")
html_str = """
<p>
List of line names that categorized to "Others".
</p>
"""
html_str += r'''<table border = "1">
<tr>
<th> Name </th>
</tr>'''
for line in lines:
html_str += f'''
<tr>
<td> {line} </td>
</tr>'''
html_str += '\n</table>'
return _render(html_str)
def render_dst_data_hists(process):
if process == "hlt1":
return _render("")
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
html_str = ''
for hist_suffix in ("data_size", "bandwidth"):
html_str += f"""
<object type="image/png" data="{process}__hist__dst_{hist_suffix}.png"></object>
"""
return _render(html_str)
def render_lines_pie_chart(process):
if process == "hlt1":
return _render("")
return _render(f"""
<p>
<b>The number of selection lines per working group:</b> <br>
</p>
<object type="image/png" data="{process}__lines_per_wg.png"></object>
<p>
"Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
To make lines properly categorized, one should follow the naming convention -
name of lines should start with `Hlt2/Spruce[WG]_`.
</p>
""")
def render_bar_charts(process,
stream_config,
streams,
metrics=('bandwidth', 'rate')):
if process != "hlt2":
return _render("")
html_str = ''
for metric in metrics:
html_str += f'''
<p>
<b>{metric.capitalize()} within each stream:</b>
</p>
<p>
"TotalInclusive" is the physical rate/bandwidth of the stream. "SumWGs" is the simple arithmetic sum of all bars except "TotalInclusive".<br>
The difference between the two bars gives us information about the degree of WG-by-WG overlap.
</p>
'''
if stream_config == "production":
html_str += f'''
<p>
<b>Note:</b> The WG bars in the HLT2 Turbo stream correspond almost exactly to the output streams of the Sprucing passthrough of Turbo.<br>
Furthermore, this means <b>the "SumWGs" {metric} bar of HLT2 turbo is equal to the total physical {metric} of Turbo post-sprucing.</b><br>
</p>
'''
for stream in streams:
html_str += f'''
<object type="image/png" data="{process}__{metric}_bar_chart__{stream_config}__{stream}.png"></object>
'''
html_str += '''
<p>
Rates for a WG within a stream are calculated by counting the number of events saved to that stream in which at least 1 of that WG's lines fired.<br>
Bandwidths for a WG are calculated by summing the event size of all events saved to the stream in which at least 1 of that WG's lines fired.<br>
</p>
'''
return _render(html_str)
def render_extra_sim_matrices(process, stream_config, streams):
if process != "hlt2":
return _render("")
html_str = """
<p>
The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
It shows how much events in the stream A are covered by another stream B. <br>
The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
</p>
<p>
The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
It shows how similar the two streams are and is useful in bandwidth division. <br>
</p>
"""
for stream in streams:
html_str += f"""
<p>
The overlap matrix of the {stream.capitalize()} stream is:
</p>
"""
with open(
fname_helper.intra_stream_overlap_matrix_path(
stream_config, stream), "r") as overlap:
html_str += overlap.read()
html_str += f"""
<p>
The Jaccard similarity matrix of the {stream.capitalize()} stream is:
</p>
"""
with open(
fname_helper.intra_stream_jaccard_similarities_path(
stream_config, stream), "r") as jaccard:
html_str += jaccard.read()
return _render(html_str)
def list_of_links_html(process: str, fname_helper: FileNameHelper,
stream_config: str, building_locally: bool):
base_path = fname_helper.base_html_path(building_locally)
links = [
f"""<li><a href="{base_path}/{process}__all_rates.html"> A single rate/bandwidth table featuring every trigger line in all streams</a></li>"""
]
if process != "hlt1":
rate_table_split_by_stream = fname_helper.final_rate_table_all_lines_split_by_stream_path(
stream_config, full_path=False)
links.append(
f"""<li><a href="{base_path}/{rate_table_split_by_stream}"> Rate/bandwidth tables for each stream, with 1 row per trigger line</a></li>"""
)
rate_table_by_stream_by_wg = fname_helper.final_rate_table_all_lines_split_by_stream_by_wg_path(
stream_config, full_path=False)
links.append(
f"""<li><a href="{base_path}/{rate_table_by_stream_by_wg}"> Rate/bandwidth tables for each stream, split also by WG, with 1 row per trigger line</a></li>"""
)
links.append(
f"""<li><a href="{base_path}/{process}__similarity_matrices.html"> Jaccard similarity and overlap matrices between streams</a></li>"""
)
if process == "hlt2":
links += [
f"""<li><a href="{base_path}/{process}__extra_bar_charts.html">Bar charts as below for DstData bandwidth</a></li>""",
f"""<li><a href="{base_path}/{process}__extra_similarity_matrices.html">Similarity and overlap matrices between WGs within each stream</a></li>""",
]
if process != "hlt1":
links += [
f"""<li><a href="{base_path}/{process}__other_lines.html">List of lines in "Other" category</a></li>""",
f"""<li><a href="{base_path}/{fname_helper.line_descr_path(full_path=False)}"> PersistReco and ExtraOutput info for all lines in all streams</a></li>"""
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
]
return "\n".join(links)
def render_sim_matrices_page(process, fname_helper, stream_config):
if process == "hlt1":
return _render("")
html_str = f"""
<p>
The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
It shows how much events in the stream A are covered by another stream B. <br>
The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
</p>
<p>
The overlap matrix of the {stream_config} streams is:
</p>
"""
with open(fname_helper.overlap_matrix_path(stream_config), "r") as overlap:
html_str += overlap.read()
html_str += f"""
<p>
The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
It shows how similar the two streams are and is useful in bandwidth division. <br>
</p>
<p>
The Jaccard similarity matrix of the {stream_config} streams is:
</p>
"""
with open(fname_helper.jaccard_similarities_path(stream_config),
"r") as jaccard:
html_str += jaccard.read()
return _render(html_str)
def _write_message(message,
args,
tot_rate,
tot_bandwidth,
n_low_rate,
n_high_rate,
process_dependent_message=False):
lines = [
f"all_jobs_successful_bool = {1 if args.exit_code == 0 else 0}\n",
f"total_rate = {tot_rate:.2f} kHz\n",
f"total_bandwidth = {tot_bandwidth:.2f} GB/s\n",
f"n_low_rate = {n_low_rate:d}\n", f"n_high_rate = {n_high_rate:d}\n"
]
if process_dependent_message:
lines = [f'{args.process}__{line}' for line in lines]
message.writelines(lines)
return 0
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='make_bandwidth_test_page')
parser.add_argument(
'--process',
type=str,
choices=['hlt1', 'hlt2', 'spruce'],
help='Which stage was the test run on.')

Ross John Hunter
committed
parser.add_argument(
'-c',
'--input-config',
type=str,
required=True,
help='Path to yaml config file defining the input.')
parser.add_argument(
'--stream-config',
type=str,
required=True,
choices=['wg', 'production', 'streamless'],
)
parser.add_argument(
'--streams',
type=str,
nargs='+',
required=True,
help='List of trigger streams.')
parser.add_argument(
'-s',
'--script-path',
type=str,
required=True,
help=
'Path to the top-level testing script that is running/calling this script.'
)
parser.add_argument(
'-e',
'--exit-code',
type=int,
required=True,
help="Cumulative exit code of all previous jobs.")
parser.add_argument(
action='store_true',
help="Flag to use index page appropriate for multiple processes.")
parser.add_argument(
'--skip-top-level-information-for-process-dependent-testpage',
action='store_true',
help=
'Flag to avoid memory-consumption and build information sections of {process}__index page.'
parser.add_argument(
'--building-locally',
action='store_true',
help=
'Makes links between pages work for building the pages locally rather than on the LHCbPR website.'
)
args = parser.parse_args()
input_info = parse_yaml(args.input_config)
fname_helper = FileNameHelper(args.process)
if args.exit_code == 0:
exit_code_sentence = "All sub-jobs in this test exited successfully."
exit_code_bool = 1
exit_code_col = "green"
else:
exit_code_sentence = "There were errors in some of the sub-jobs of this test; please see the logs."
exit_code_bool = 0
exit_code_col = "red"
df = pd.read_csv(
fname_helper.final_rate_table_all_lines_path("csv"), sep=',')
number_of_lines = len(df)
kHz_to_Hz = 1000
rate_bw_info_by_line = {
df['Line'][i]: LineRateBWInfo(
df['Rate (kHz)'][i] * kHz_to_Hz, df["Avg DstData Size (kB)"][i],
df["Avg Total Event Size (kB)"][i],
df["DstData Bandwidth (GB/s)"][i], df["Total Bandwidth (GB/s)"][i])
for i in range(number_of_lines)
}
# Prepare messages to GitLab
# limits on rate: 1 MHz for Hlt1, 1 kHz for Hlt2 rate and 0.5% for Sprucing retention
tol = {'hlt1': 1e6, 'hlt2': 1000, 'spruce': 500}[args.process]
n_low_rate = len(
[info for info in rate_bw_info_by_line.values() if info.rate == 0])
n_high_rate = len(
[info for info in rate_bw_info_by_line.values() if info.rate > tol])
prod_df = pd.read_csv(
fname_helper.final_rate_table_all_streams_path(
args.stream_config, ext="csv"))
tot_rate = sum(prod_df['Rate (kHz)'])
tot_bandwidth = sum(prod_df['Total Bandwidth (GB/s)'])
# Make plots & tables
tot_rate=tot_rate,
tot_bandwidth=tot_bandwidth,
fname_helper=fname_helper,
process=args.process)
# Headline bar charts
headline_bar_chart_path_for_html = ""
if args.stream_config != "streamless":
main_rate_df = pd.read_csv(
fname_helper.final_rate_table_all_streams_path(
args.stream_config, ext='csv'))
headline_bar_chart_path_for_html = fname_helper.bar_chart_path(
args.stream_config, 'headline', 'bandwidth', full_path=False)
headline_bar_charts(
main_rate_df, args.process,
fname_helper.bar_chart_path(args.stream_config, 'headline',
'bandwidth'))
# Bar charts within a stream - only relevant for HLT2
if args.process == 'hlt2':
for stream in args.streams:
intra_stream_rates_df = pd.read_csv(
fname_helper.tmp_rate_table_intra_stream_path(
args.stream_config, stream),
header=None)
intra_stream_rates_df.columns = [
'WG', 'Rate (kHz)', 'Bandwidth (GB/s)',
'DstData Bandwidth (GB/s)'
]
for column_header in intra_stream_rates_df.columns[1:]:
fname = {
'Rate (kHz)': "rate",
'Bandwidth (GB/s)': "bandwidth",
'DstData Bandwidth (GB/s)': "dstbandwidth"
}[column_header]
make_bar_charts(
intra_stream_rates_df, column_header, stream,
fname_helper.bar_chart_path(args.stream_config, stream,
fname))
with open(
fname_helper.final_rate_table_all_streams_path(args.stream_config),
"r") as rate_html:
table_main_stream_rates = rate_html.read()
base_path = fname_helper.base_html_path(args.building_locally)
if args.skip_top_level_information_for_process_dependent_testpage:
top_level_header = ""
memory_consumption = ""
else:
top_level_header = render_top_level_header(args.script_path, base_path)
memory_consumption = render_memory_consumption()
with open(
fname_helper.html_page_outputs_path(f"{args.process}__index.html"),
"w") as html_file:
html = SINGLE_PROCESS_REPORT_TEMPLATE.render(
TOP_LEVEL_HEADER=top_level_header,
MEMORY_CONSUMPTION=memory_consumption,
BASE_PATH=base_path,
stream_config_json=fname_helper.stream_config_json_path(
args.stream_config, full_path=False),
main_rate_table=table_main_stream_rates,
BAR_CHARTS=render_bar_charts(args.process, args.stream_config,
args.streams),
HEADLINE_BAR_CHART_PATH=headline_bar_chart_path_for_html,
LIST_OF_LINKS=list_of_links_html(args.process, fname_helper,
args.stream_config,
args.building_locally),
LINES_PER_WG=render_lines_pie_chart(args.process),
DST_DATA_HIST=render_dst_data_hists(args.process),
INPUT_CONFIG_PATH=os.path.expandvars(args.input_config),
INPUT_RATE=input_info['input_rate'],
INPUT_NU=input_info['nu'],
INPUT_VELO_RADIUS=input_info['velo_radial_opening'],
EXIT_CODE_SENTENCE=exit_code_sentence,
EXIT_CODE_COLOUR=exit_code_col,
PROCESS=args.process)
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
# Extra pages
write_html_page(
fname_helper.process_dependent_html_page_outputs_path(
"other_lines.html"),
render_other_line_table(args.process, other_line_list))
write_html_page(
fname_helper.process_dependent_html_page_outputs_path(
"all_rates.html"),
render_all_lines_page(fname_helper, args.building_locally))
write_html_page(
fname_helper.process_dependent_html_page_outputs_path(
"similarity_matrices.html"),
render_sim_matrices_page(args.process, fname_helper,
args.stream_config))
write_html_page(
fname_helper.process_dependent_html_page_outputs_path(
"extra_bar_charts.html"),
render_bar_charts(
args.process,
args.stream_config,
args.streams,
metrics=['dstbandwidth']))
write_html_page(
fname_helper.process_dependent_html_page_outputs_path(
"extra_similarity_matrices.html"),
render_extra_sim_matrices(args.process, args.stream_config,
args.streams))
with open(fname_helper.html_page_outputs_path("index.html"),
"w") as html_file:
html = HLT2_AND_SPRUCE_REPORT_TEMPLATE.render(
TOP_LEVEL_HEADER=render_top_level_header(
args.script_path, base_path),
BASE_PATH=base_path,
MEMORY_CONSUMPTION=render_memory_consumption())
html_file.write(html)
else:
# In single-process tests, need 'index.html' to be picked up.
with open(
fname_helper.html_page_outputs_path(
f"{args.process}__index.html"),
"r") as process_dependent_html_file:
html_file.write(process_dependent_html_file.read())
with open(
fname_helper.html_page_outputs_path(
f"{args.process}__message.txt"), "w") as message:
_write_message(
message=message,
args=args,
tot_rate=tot_rate,
tot_bandwidth=tot_bandwidth,
n_low_rate=n_low_rate,
n_high_rate=n_high_rate,
process_dependent_message=False)
with open(
fname_helper.html_page_outputs_path("message.txt"),
"a" if args.make_hlt2_and_spruce_page else "w") as message:
_write_message(
message=message,
args=args,
tot_rate=tot_rate,
tot_bandwidth=tot_bandwidth,
n_low_rate=n_low_rate,
n_high_rate=n_high_rate,
process_dependent_message=True)