Newer
Older
###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
import argparse
import jinja2
import matplotlib.pyplot as plt
import pandas as pd
from math import log10
from dataclasses import dataclass, field
from typing import List
from collections import namedtuple
from PRConfig.bandwidth_helpers import FileNameHelper
plt.ioff()
REPORT_TEMPLATE = jinja2.Template("""
<html>
<head></head>
<body>
<p>
slot.build_id: $$version$$<br>
platform: $$platform$$<br>
hostname: $$hostname$$<br>
cpu_info: $$cpu_info$$<br>
testing script path: {{SCRIPTPATH}}
<li><a href="{{BASE_PATH}}/run.log">Logs</a></li>
<p style="color:{{EXIT_CODE_COLOUR}}">
<b>{{EXIT_CODE_SENTENCE}}</b>
</p>
<p>
Results per working group and stream:
<ul>
<li>Inclusive retention and rate</li>
<li>(Jaccard) similarity matrix</li>
<li>Average DstData size and bandwidth</li>
<li>Average event size and bandwidth</li>
</ul>
</p>
<p>
Results per line: all of the above, plus
<ul>
<li>Exclusive retention and rate</li>
<li>Descriptives (whether persistreco and/or extra outputs is enabled)</li>
</ul>
</p>
<p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p>
<p>
Input sample information:
<ul>
<li>Config file: {{INPUT_CONFIG_PATH}}</li>
<li>Input rate: {{INPUT_RATE}} kHz</li>
<li>Number of interactions per bunch crossing (ν): {{INPUT_NU}}</li>
<li>Radius of VELO opening: {{INPUT_VELO_RADIUS}} mm</li>
</ul>
</p>
<p>
Other results are shown by plots or tables (in the links) below. <br>
</p>
<object type="image/png" data="lines_per_wg.png"></object>
<p>
The number of selection lines per working group. <br>
"Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
To make lines properly categorized, one should follow the naming convention,
name of lines should start with `Hlt2/Spruce[WG]_`.
</p>
<object type="image/png" data="hist__rate.png"></object>
<p>
Distribution of rate of selection lines. <br>
The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page below. <br>
A line is considered to be "problematic" if it has a rate of 0 Hz
or larger than 1 kHz, which requires some attention. <br>
The rates of all lines are listed in a html page attached below. <br>
</p>
<object type="image/png" data="hist__dst_data_size.png"></object>
<p>
Distribution of DstData RawBank size of selection lines. <br>
The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page below.
</p>
<object type="image/png" data="hist__total_size.png"></object>
<p>
Distribution of total event size of selection lines. <br>
The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page below. <br>
A line is considered to be "problematic" if its DstData size or total event size
is larger than 1 MB, which requires some attention. <br>
The event sizes of all lines are listed in a html page attached below. <br>
</p>
<object type="image/png" data="hist__dst_bandwidth.png"></object>
<p>
Distribution of bandwidth computed from DstData RawBank size. <br>
The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page below.
</p>
<object type="image/png" data="hist__tot_bandwidth.png"></object>
<p>
Distribution of bandwidth computed from total event size. <br>
The total distribution is shown as a stacked histogram, split into several histograms of WGs. <br>
The distributions per WG is attached in the html page below. <br>
Currently, a line is considered to be "problematic" if its bandwidth from DstData size
is larger than 200 MB/s, which requires some attention. This is a temporary limit. <br>
The event sizes of all lines are listed in a html page attached below. <br>
</p>
<object type="image/png" data="memory_consumption.png"></object>
<p>
Memory consumption as functions of Wall-time. <br>
The virtual memory size is the total amount of memory the process may hypothetically access. <br>
The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
Swap memory is used when RAM is full. <br>
The maximum resident set size usage is $$max_rss$$ GB. <br>
The maximum proportional set size usage is $$max_pss$$ GB. <br>
</p>
<ul>
<li><a href="{{BASE_PATH}}/other_lines.html">Show list of lines in "Other" category</a></li>
<li><a href="{{BASE_PATH}}/plots_per_wg.html">Show plots split by WGs</a></li>
<li><a href="{{BASE_PATH}}/all_rates.html">Show rates, event sizes and bandwidths of all lines</a></li>
<li><a href="{{BASE_PATH}}/similarities_jaccards.html"> Show similarities Jaccards of different stream configurations</a></li>
<li><a href="{{BASE_PATH}}/rates_streaming.html"> Show rates of streams under different configurations</a></li>
<li><a href="{{BASE_PATH}}/{{line_descr}}"> PersistReco and ExtraOutput for selection lines</a></li>
<li><a href="{{BASE_PATH}}/{{rate_table_split_by_wg_stream}}"> Split by working group: rates, event sizes and bandwidths of all lines</a></li>
</b></b>
</ul>
<p> Additional results for HLT2 Bandwidth test (not available for Sprucing test) </p>
<ul>
<li><a href="{{BASE_PATH}}/{{rate_table_split_by_prod_stream}}"> Split by production stream: rates, event sizes and bandwidths of all lines</a></li>
</ul>
</body>
</html>
""")
HLT2_REPORT_TEMPLATE = jinja2.Template("""<p>
The bandwidth test was run under 3 streaming configurations: streamless (all lines written to the same output file), production-stream and wg-stream. <br>
The definition of the production streaming and working-group streaming can be found below.
<li><a href="{{BASE_PATH}}/{{stream_config_json_prod}}">Production-stream configuration</a></li>
<li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">WG-stream configuration</a></li>
The production stream configuration reflects the streaming we will have for data taking. <br>
The rates, event sizes and bandwidths results from production-stream configuration is: <br>
</p>
{{table_5stream_rates}}""")
SPRUCE_REPORT_TEMPLATE = jinja2.Template("""<p>
The bandwidth test was run under 2 streaming configurations: streamless and one stream per WG. <br>
The definition of per-WG-stream configuration can be found below.
</p>
<ul>
<li><a href="{{BASE_PATH}}/{{stream_config_json_wg}}">WG-stream configuration</a></li>
</ul>
<p>
The wg-stream configuration is close to what we will have for data taking. <br>
The rates, event sizes and bandwidths results from wg-stream configuration is: <br>
</p>
{{table_wgstream_rates}}""")
TABLE_OTHER_LINE_TEMPLATE = jinja2.Template("""
<p>
List of line names that categorized to "Others".
</p>
{{table_other_lines}}
""")
PLOTS_PER_WG_TEMPLATE = jinja2.Template("""
<p>
Plots of rates, event sizes and bandwidths for lines, split into different WGs.
</p>
{{plots_per_wg}}
""")
ALL_RATE_TEMPLATE = jinja2.Template("""
<p>
Rates, event sizes and bandwidths of all lines, listed descending in retention rates. <br>
The results are obtained by a per-event analysing under 5-stream configuration. <br>
These numbers are also saved in a csv file: <a href="{{BASE_PATH}}/{{CSV_PATH}}">{{CSV_PATH}}</a>
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
</p>
""")
known_working_groups = [
"B2CC",
"B2OC",
"BandQ",
"BnoC",
"Calib",
"Calo",
"Charm",
"DPA",
"HLT",
"IFT",
"Luminosity",
"PID",
"QCD",
"QEE",
"RD",
"RTA",
"Simulation",
"SL",
"Tagging",
"Tracking",
]
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
@dataclass
class WGRateBWInfo:
nlines: int = 0
rate: List[float] = field(default_factory=lambda: [])
dst_size: List[float] = field(default_factory=lambda: [])
tot_size: List[float] = field(default_factory=lambda: [])
dst_bw: List[float] = field(default_factory=lambda: [])
tot_bw: List[float] = field(default_factory=lambda: [])
LineRateBWInfo = namedtuple(
"LineRateBWInfo", ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"])
def histo_maker(entry_list,
xlabel,
title,
plot_path,
nbins=100,
range=None,
take_log=False,
stacked=False,
labels=[],
legend=False):
if take_log:
safe_log = lambda rate: log10(max(rate, 0.1))
title = f"{title} (all values <= log10(0.1) are in the first bin)"
if stacked:
# entry_list is a list of lists
entry_list = [[safe_log(rate) for rate in lst]
for lst in entry_list]
else:
entry_list = [safe_log(rate) for rate in entry_list]
fig = plt.figure()
if range:
# If specified, range should be a 2-tuple of floats (low, high)
plt.hist(entry_list, nbins, range=range, stacked=stacked, label=labels)
else:
plt.hist(entry_list, nbins, stacked=stacked, label=labels)
plt.xlabel(xlabel)
plt.ylabel("Number of lines")
if title: plt.title(title)
if legend: plt.legend(loc='upper right')
plt.yscale('log', nonpositive='clip')
plt.savefig(plot_path, format="png")
plt.close(fig)
def make_plots_per_wg(fname_helper, wg_name, wg_bw_info, process):
'''
Make plots of rates and event sizes for each WG.
Arguments:
wg_name: name of the working group
rate_list: list containing rates of all lines from the WG
dst_size_list: list containing DstData Rawbank size of all lines from the WG
tot_size_list: list containing total event size of all lines from the WG
process: either `hlt2` or `spruce`
title = f"{wg_name} {process.capitalize()}"
for attrib, xtitle, plot_bit, take_log, range in zip(
["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"], [
"Log10(Rate [Hz])", "DstData RawBank Size [kB]",
"Total Event Size [kB]",
"Log10(Bandwidth from DstData Size [MB/s])",
"Log10(Bandwidth from Total Event Size [MB/s])"
], [
"rate", "dst_data_size", "total_size", "dst_bandwidth",
"tot_bandwidth"
], [True, False, False, True, True], [(-2, 7), None, None, (-2, 5),
(-2, 5)]):
histo_maker(
getattr(wg_bw_info, attrib),
xtitle,
title,
fname_helper.html_page_outputs_path(
f"hist__{plot_bit}__{wg_name}.png"),
range=range,
take_log=take_log)
def make_plots(all_lines_bw_info,
tot_rate,
tot_bandwidth,
wgs=known_working_groups):
'''
Make plots of rate and event sizes of all lines.
It will create three stacked histograms containing distributions of all lines,
and a pie chart showing the number of lines per WG.
Arguments:
rate_dict: dictionary of line names and their rates
tot_rate: total rate of all lines
evt_size_dict: dictionary of line names and their event sizes
process: either `hlt2` or `spruce`
wgs: list of working groups to categorize
'''
# Count number of lines and rates/evt sizes per WG
rate_info_per_wg = {wg: WGRateBWInfo() for wg in wgs + ["Other"]}
for line, bw_info in all_lines_bw_info.items():
# Expect e.g {Hlt2,Spruce}<WG>_<rest-of-line-name>
wg_guess = line.split("_")[0].removeprefix(process.capitalize())
for wg in rate_info_per_wg.keys():
if wg_guess.startswith(wg):
rate_info_per_wg[wg].nlines += 1
for attrib in [
"rate", "dst_size", "tot_size", "dst_bw", "tot_bw"
]:
getattr(rate_info_per_wg[wg], attrib).append(
getattr(bw_info, attrib))
list_other_lines.append(line)
rate_info_per_wg["Other"].nlines += 1
for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
getattr(rate_info_per_wg["Other"], attrib).append(
getattr(bw_info, attrib))
rate_info_per_wg = {
k: info
for k, info in rate_info_per_wg.items() if info.nlines != 0
}
# Sort the wg in number of lines
rate_info_per_wg = {
k: info
for k, info in sorted(
rate_info_per_wg.items(), key=lambda x: x[1].nlines)
if info.nlines != 0
}
# Make a pie plot of lines per WG
labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()]
plt.pie([v.nlines for v in rate_info_per_wg.values()],
radius=1,
labels=labels,
wedgeprops=dict(width=0.4, edgecolor="w"))
plt.title(f"Number of {process.capitalize()} lines per WG")
plt.savefig("tmp/Output/lines_per_wg.png", format="png")
plt.close(fig)
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
### Make hist plots
title = f"{process.capitalize()}"
for attrib, xtitle, title, plot_bit, take_log, range in zip(
["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"], [
"Log10(Rate [Hz])", "DstData RawBank Size [kB]",
"Total Event Size [kB]",
"Log10(Bandwidth from DstData Size [MB/s])",
"Log10(Bandwidth from Total Event Size [MB/s])"
], [
f"Total Rate: {tot_rate:.2f} kHz", "", "", "",
f"Total bandwidth: {tot_bandwidth:.2f} GB/s"
], [
"rate", "dst_data_size", "total_size", "dst_bandwidth",
"tot_bandwidth"
], [True, False, False, True, True],
[(-2, 7), (0, 500 if process == 'hlt2' else 1000),
(0, 500 if process == 'hlt2' else 1000), (-2, 5), (-2, 5)]):
histo_maker(
[getattr(info, attrib) for info in rate_info_per_wg.values()],
xtitle,
title,
fname_helper.html_page_outputs_path(f"hist__{plot_bit}.png"),
range=range,
take_log=take_log,
stacked=True,
legend=True,
labels=list(rate_info_per_wg.keys()))
for wg_name, bw_info_per_wg in rate_info_per_wg.items():
make_plots_per_wg(fname_helper, wg_name, bw_info_per_wg, process)
return rate_info_per_wg.keys(), list_other_lines
def make_other_line_table(name_list):
table_html_str = r'''<table border = "1">
<tr>
<th> Name </th>
</tr>'''
for name in name_list:
table_html_str += '''
<tr>
<td> %s </td>
</tr>''' % name
table_html_str += '\n</table>'
return table_html_str
def make_plots_per_wg_list(wg_list):
list_html_str = ''
for wg_name in wg_list:
list_html_str += f'''
<p>
Plots of {wg_name} group:
</p>
<object type="image/png" data="hist__rate__{wg_name}.png"></object>
<object type="image/png" data="hist__dst_data_size__{wg_name}.png"></object>
<object type="image/png" data="hist__total_size__{wg_name}.png"></object>
<object type="image/png" data="hist__dst_bandwidth__{wg_name}.png"></object>
<object type="image/png" data="hist__tot_bandwidth__{wg_name}.png"></object>
'''
return list_html_str
def parse_yaml(file_path):
with open(os.path.expandvars(file_path), 'r') as f:
return yaml.safe_load(f)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='make_bandwidth_test_page')
parser.add_argument(
'--process',
type=str,
choices=['hlt2', 'spruce'],
required=True,
help='Which stage was the test run on')

Ross John Hunter
committed
parser.add_argument(
'-c',
'--input-config',
type=str,
required=True,
help='Path to yaml config file defining the input.')
parser.add_argument(
'-s',
'--script-path',
type=str,
required=True,
help=
'Path to the top-level testing script that is running/calling this script'
)
parser.add_argument(
'-e',
'--exit-code',
type=int,
required=True,
help="Cumulative exit code of all previous jobs.")
parser.add_argument(
'--building-locally',
action='store_true',
help=
'Makes links between pages work for building the pages locally rather than on the LHCbPR website.'
)
args = parser.parse_args()
input_info = parse_yaml(args.input_config)
fname_helper = FileNameHelper(args.process)
if args.exit_code == 0:
exit_code_sentence = "All sub-jobs in this test exited successfully."
exit_code_bool = 1
exit_code_col = "green"
else:
exit_code_sentence = "There were errors in some of the sub-jobs of this test; please see the logs."
exit_code_bool = 0
exit_code_col = "red"
df = pd.read_csv(
fname_helper.final_rate_table_all_lines_path("csv"), sep=',')
number_of_lines = len(df)
GB_to_MB = 1000
kHz_to_Hz = 1000
rate_bw_info_by_line = {
df['Line'][i]: LineRateBWInfo(
df['Rate (kHz)'][i] * kHz_to_Hz, df["Avg DstData Size (kB)"][i],
df["Avg Total Event Size (kB)"][i],
df["DstData Bandwidth (GB/s)"][i] * GB_to_MB,
df["Total Bandwidth (GB/s)"][i] * GB_to_MB)
for i in range(number_of_lines)
}
# Prepare messages to GitLab
# limits on rate: 1 kHz for Hlt2 rate and 0.5% for Sprucing retention
tol = 1000 if args.process == 'hlt2' else 500
n_low_rate = len(
[info for info in rate_bw_info_by_line.values() if info.rate == 0])
n_high_rate = len(
[info for info in rate_bw_info_by_line.values() if info.rate > tol])
prod_df = pd.read_csv(
fname_helper.final_rate_table_all_streams_path(
"production" if args.process == "hlt2" else "wg", ext="csv"))
tot_rate = sum(prod_df['Rate (kHz)'])
tot_bandwidth = sum(prod_df['Total Bandwidth (GB/s)'])
# Make plots & tables
wg_list, other_line_list = make_plots(
tot_rate=tot_rate,
tot_bandwidth=tot_bandwidth,
process=args.process)
other_line_table = make_other_line_table(other_line_list)
plots_per_wg = make_plots_per_wg_list(wg_list)
if args.process == 'hlt2':
with open(
fname_helper.final_rate_table_all_streams_path("production"),
"r") as rate_html:
table_5stream_rates = rate_html.read()
hlt2_or_spruce_template = HLT2_REPORT_TEMPLATE.render(
BASE_PATH=fname_helper.base_html_path(args.building_locally),
stream_config_json_prod=fname_helper.stream_config_json_path(
"production", full_path=False),
stream_config_json_wg=fname_helper.stream_config_json_path(
"wg", full_path=False),
table_5stream_rates=table_5stream_rates)
elif args.process == 'spruce':
with open(fname_helper.final_rate_table_all_streams_path("wg"),
"r") as rate_html:
table_wgstream_rates = rate_html.read()
hlt2_or_spruce_template = SPRUCE_REPORT_TEMPLATE.render(
BASE_PATH=fname_helper.base_html_path(args.building_locally),
stream_config_json_wg=fname_helper.stream_config_json_path(
"wg", full_path=False),
table_wgstream_rates=table_wgstream_rates)
with open(fname_helper.html_page_outputs_path("index.html"),
"w") as html_file:
BASE_PATH=fname_helper.base_html_path(args.building_locally),

Ross John Hunter
committed
HLT2_OR_SPRUCE_TEMPLATE=hlt2_or_spruce_template,
INPUT_CONFIG_PATH=os.path.expandvars(args.input_config),
INPUT_RATE=input_info['input_rate'],
INPUT_NU=input_info['nu'],
INPUT_VELO_RADIUS=input_info['velo_radial_opening'],
EXIT_CODE_SENTENCE=exit_code_sentence,
EXIT_CODE_COLOUR=exit_code_col,
line_descr=fname_helper.line_descr_path(full_path=False),
rate_table_split_by_prod_stream=fname_helper.
final_rate_table_all_lines_split_by_stream_path(
"production", full_path=False),
rate_table_split_by_wg_stream=fname_helper.
final_rate_table_all_lines_split_by_stream_path(
"wg", full_path=False))
with open(fname_helper.html_page_outputs_path("other_lines.html"),
"w") as html_file:
html = TABLE_OTHER_LINE_TEMPLATE.render(
html_file.write(html)
with open(fname_helper.html_page_outputs_path("plots_per_wg.html"),
"w") as html_file:
html = PLOTS_PER_WG_TEMPLATE.render(plots_per_wg=plots_per_wg)
html_file.write(html)
with open(fname_helper.html_page_outputs_path("all_rates.html"),
"w") as html_file:
html = ALL_RATE_TEMPLATE.render(
BASE_PATH=fname_helper.base_html_path(args.building_locally),
CSV_PATH=fname_helper.final_rate_table_all_lines_path(
"csv", full_path=False))
with open(fname_helper.final_rate_table_all_lines_path("html"),
"r") as rate_table:
html_file.write(rate_table.read())
stream_configs = ["production", "wg"] if args.process == "hlt2" else ["wg"]
with open(
fname_helper.html_page_outputs_path("similarities_jaccards.html"),
"w") as html_file:
for stream_config in stream_configs:
html_file.write(f"""
The Jaccard similarity matrix (fractional overlap) of the {stream_config} streams is:
with open(
fname_helper.jaccard_similarities_path(stream_config),
"r") as jaccard:
with open(
fname_helper.html_page_outputs_path("rates_streaming.html"),
"w") as html_file:
for stream_config in stream_configs:
html_file.write(f"""
The rates, event sizes and bandwidths of the {stream_config} streams are:
with open(
fname_helper.final_rate_table_all_streams_path(
stream_config), "r") as rate_html:
with open(fname_helper.html_page_outputs_path("message.txt"),
"w") as message:
message.write(
f'all_jobs_successful_bool = {1 if args.exit_code == 0 else 0}\n')
message.write(f'total_rate = {tot_rate:.2f} kHz\n')
message.write(f'total_bandwidth = {tot_bandwidth:.2f} GB/s\n')
message.write(f'n_low_rate = {n_low_rate:d}\n')
message.write(f'n_high_rate = {n_high_rate:d}\n')
pass