make_bandwidth_test_page.py

###############################################################################
# (c) Copyright 2023 CERN for the benefit of the LHCb Collaboration           #
#                                                                             #
# This software is distributed under the terms of the GNU General Public      #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING".   #
#                                                                             #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization  #
# or submit itself to any jurisdiction.                                       #
###############################################################################
import argparse
import jinja2
import matplotlib.pyplot as plt
import pandas as pd
import os
from math import log10
from dataclasses import dataclass, field
from typing import List
from collections import namedtuple
from PRConfig.bandwidth_helpers import FileNameHelper, parse_yaml, guess_wg, KNOWN_WORKING_GROUPS

plt.ioff()

SINGLE_PROCESS_REPORT_TEMPLATE = jinja2.Template("""
<html>
<head></head>
<body>
{{TOP_LEVEL_HEADER}}
<p style="color:{{EXIT_CODE_COLOUR}}">
    <b>{{EXIT_CODE_SENTENCE}}</b>
</p>
<p>
    This page contains the results of the {{PROCESS}} bandwidth test. Main results: <br>
    <object type="image/png" data="{{HEADLINE_BAR_CHART_PATH}}"></object>
</p>
{{main_rate_table}}
<p>
    The streaming configuration (i.e. which lines went to each stream) can be found in JSON format
    <a href="{{BASE_PATH}}/{{stream_config_json}}">here</a>. <br>
    This streaming configuration is our current set of lines to be used in the next data-taking period. <br>
    "DstData" is the raw bank to which reconstructed information (candidates, other reconstructed tracks etc.) are saved. <br>
    The "DstData bandwidth" is therefore the bandwidth counting only that raw bank. <br>
    The total event size (and total bandwidth) count all raw banks (incl. DstData, and detector raw banks if present) in the file. 
    <b>NB:
    In real data-taking, raw banks are now left uncompressed when writing, and then the whole file is compressed afterwards. 
    We account for this compression by multiplying event sizes and bandwidths by a scaling factor to give accurate per-stream bandwidths.
    The scaling factor is calculated for each file in the test as: '(size of the compressed file) / (size of the uncompressed file)'. 
    </b>
    <br>
</p>
<p> Scroll down to see: </p>
<ul>
    <li> Bar charts of rate and bandwidth for each WG within each stream (HLT2 only), </li>
    <li> A pie chart of all lines split by WGs (HLT2 and sprucing only), </li>
    <li> Information about the input sample, </li>
    <li> Stacked histograms of all lines, split by WG, of rate/bandwidth metrics, </li>
    <li> Memory consumption of the test as a function of time. </li>
</ul>
<p>
    Further results can be found in the links below:
</p>
<ul>
    {{LIST_OF_LINKS}}
    $${{PROCESS}}__comparison$$
    </b></b>
</ul>
<p> See: <a href="https://lbfence.cern.ch/alcm/public/figure/details/32">RTA & DPA Workflow</a> for reference figures regarding bandwidth.</p>
{{BAR_CHARTS}}
{{LINES_PER_WG}}
<p>
    <b>Input sample information:</b>
    <ul>
    <li>Config file: {{INPUT_CONFIG_PATH}}</li>
    <li>Input rate: {{INPUT_RATE}} kHz</li>
    <li>Number of interactions per bunch crossing (&#957): {{INPUT_NU}}</li>
    <li>Radius of VELO opening: {{INPUT_VELO_RADIUS}} mm</li>
    </ul>
</p>
<p>
    <b>Stacked histograms of all lines, split by WG, of rate/bandwidth metrics:</b> <br>
    The total distributions are shown as a stacked histogram, split into several histograms of WGs. <br>
    The distributions per WG is attached in the html page linked above. <br>
    Total event size is calculated from summing all raw banks in the file (including DstData) and then multiplying by a per-stream compression factor. <br>
    Where appropriate, the DstData raw bank size and DstData bandwidth are calculated from summing only the DstData raw bank and then multiplying by a per-stream compression factor. <br>
</p>
<object type="image/png" data="{{PROCESS}}__hist__tot_bandwidth.png"></object>
<object type="image/png" data="{{PROCESS}}__hist__rate.png"></object>
<object type="image/png" data="{{PROCESS}}__hist__total_size.png"></object>
{{DST_DATA_HIST}}
{{MEMORY_CONSUMPTION}}
</body>
</html>
""")

HLT2_AND_SPRUCE_REPORT_TEMPLATE = jinja2.Template("""
<html>
<head></head>
<body>
{{TOP_LEVEL_HEADER}}
<p>
    The bandwidth test ran an Hlt2 test, and then a Sprucing test on the Full-stream output. <br>
    The appropriate process-specific webpages can be found below.
</p>
<ul>
    <li><a href="{{BASE_PATH}}/hlt2__index.html">Hlt2 results</a></li>
    <li><a href="{{BASE_PATH}}/spruce__index.html">Sprucing results</a></li>
</ul>
{{MEMORY_CONSUMPTION}}
</body>
</html>""")


@dataclass
class WGRateBWInfo:
    nlines: int = 0
    rate: List[float] = field(default_factory=lambda: [])
    dst_size: List[float] = field(default_factory=lambda: [])
    tot_size: List[float] = field(default_factory=lambda: [])
    dst_bw: List[float] = field(default_factory=lambda: [])
    tot_bw: List[float] = field(default_factory=lambda: [])


LineRateBWInfo = namedtuple(
    "LineRateBWInfo", ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"])


def histo_maker(entry_list,
                xlabel,
                title,
                plot_path,
                nbins=100,
                range=None,
                take_log=False,
                log_th=-4,
                stacked=False,
                labels=[],
                legend=False):
    if take_log:
        safe_log = lambda rate: log10(rate) if rate > float(f'1e{log_th}') else log_th - 1
        title = f"{title} (all values <= log10(1e{log_th}) are in the first bin)"
        if stacked:
            # entry_list is a list of lists
            entry_list = [[safe_log(rate) for rate in lst]
                          for lst in entry_list]
        else:
            entry_list = [safe_log(rate) for rate in entry_list]

    fig = plt.figure()
    if range:
        # If specified, range should be a 2-tuple of floats (low, high)
        plt.hist(entry_list, nbins, range=range, stacked=stacked, label=labels)
    else:
        plt.hist(entry_list, nbins, stacked=stacked, label=labels)
    plt.xlabel(xlabel)
    plt.ylabel("Number of lines")
    if title: plt.title(title)
    if legend: plt.legend(loc='upper right')
    plt.yscale('log', nonpositive='clip')
    plt.savefig(plot_path, format="png")
    plt.close(fig)


def make_plots(all_lines_bw_info, tot_rate, tot_bandwidth, fname_helper,
               process):
    '''
    Make plots of rate, bandwidth and event sizes of all lines.
    It will create 5 stacked histograms containing distributions of all lines
    grouped by WG, and a pie chart showing the number of lines per WG.

    Arguments:
        all_lines_bw_info: dict(line_name: LineRateBWInfo object)
        tot_rate: total rate of all lines (arithmetic sum of stream rates)
        tot_bandwidth: total bandwidth of all lines (arithmetic sum of stream BWs)
        fname_helper: instance of FileNameHelper
        process: `hlt1`, `hlt2` or `spruce`

    Returns:
        - list of found WGs with >= 1 line
        - list of lines that didnt fit into 1 WG
    '''

    # Count number of lines and rates/evt sizes per WG
    rate_info_per_wg = {
        wg: WGRateBWInfo()
        for wg in KNOWN_WORKING_GROUPS + ["Other"]
    }
    list_other_lines = []
    for line, bw_info in all_lines_bw_info.items():
        wg_guess = guess_wg(line, process)
        rate_info_per_wg[wg_guess].nlines += 1
        if wg_guess == "Other":
            list_other_lines.append(line)

        for attrib in ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"]:
            getattr(rate_info_per_wg[wg_guess], attrib).append(
                getattr(bw_info, attrib))

    rate_info_per_wg = {
        k: info
        for k, info in rate_info_per_wg.items() if info.nlines != 0
    }

    # Make a pie chart of lines per WG
    labels = [f"{k} ({int(v.nlines)})" for k, v in rate_info_per_wg.items()]
    fig = plt.figure()
    pie = plt.pie([v.nlines for v in rate_info_per_wg.values()],
                  radius=1,
                  wedgeprops=dict(width=0.4, edgecolor="w"))
    plt.legend(
        pie[0],
        labels,
        loc='center',
        bbox_to_anchor=(1, 0.5),
        bbox_transform=plt.gcf().transFigure)
    plt.title(f"Number of {process.capitalize()} lines per WG")
    plt.savefig(
        fname_helper.process_dependent_html_page_outputs_path(
            "lines_per_wg.png"),
        format="png",
        bbox_inches='tight')
    plt.close(fig)

    # Stacked histograms
    title = f"{process.capitalize()}"
    for attrib, xtitle, title, plot_bit, take_log, log_th, range in zip(
        ["rate", "dst_size", "tot_size", "dst_bw", "tot_bw"], [
            "Log10(Rate [Hz])", "DstData RawBank Size [kB]",
            "Total Event Size [kB]",
            "Log10(Bandwidth from DstData Size [GB/s])",
            "Log10(Bandwidth from Total Event Size [GB/s])"
        ], [
            f"Total Rate: {tot_rate:.2f} kHz", "", "", "",
            f"Total bandwidth: {tot_bandwidth:.2f} GB/s"
        ], [
            "rate", "dst_data_size", "total_size", "dst_bandwidth",
            "tot_bandwidth"
        ], [True, False, False, True, True], [-1, 0, 0, -4, -4],
        [(-2, 7), (0, 500 if process == 'hlt2' else 1000),
         (0, 500 if process == 'hlt2' else 1000), (-5, 2), (-5, 2)]):
        histo_maker(
            [getattr(info, attrib) for info in rate_info_per_wg.values()],
            xtitle,
            title,
            fname_helper.process_dependent_html_page_outputs_path(
                f"hist__{plot_bit}.png"),
            range=range,
            take_log=take_log,
            log_th=log_th,
            stacked=True,
            legend=True,
            labels=list(rate_info_per_wg.keys()))

    return list_other_lines


def headline_bar_charts(rates_df: pd.DataFrame, process: str, plot_path: str):
    """Headline bar chart of rate/bandwidth per stream c.f. TDR"""

    TDR_BANDWIDTHS = {
        "hlt2": {
            "full": 5.90,
            "turbo": 2.50,
            "turcal": 1.60,
            "total": 10.00,
        },
        "spruce": {
            "total": 0.80
        }
    }
    TITLES = {
        "hlt2": "Hlt2 (output to tape)",
        "spruce": "Excl. Sprucing of WG streams to disk"
    }
    PRETTY_STREAM_NAMES = {
        "slepton": "SL",
        "qee": "QEE",
        "rd": "RD",
        "bandq": "B&Q",
        "b_to_open_charm": "B2OC",
        "bnoc": "BnoC",
        "b_to_charmonia": "B2CC",
        "full": "Full",
        "turbo": "Turbo",
        "turcal": "TurCal",
        "ift": "IFT"
    }

    bandwidths = {
        "Current":
        dict(zip(rates_df['Stream'], rates_df['Total Bandwidth (GB/s)'])),
        "TDR": {
            stream: TDR_BANDWIDTHS[process].get(stream, 0)
            for stream in rates_df['Stream'].to_list()
        }
    }
    for series in ["Current", "TDR"]:
        bandwidths[series] = {
            PRETTY_STREAM_NAMES.get(stream, stream): val
            for stream, val in bandwidths[series].items()
        }

    bandwidths['Current']['Total'] = sum(bandwidths['Current'].values())
    bandwidths['TDR']['Total'] = TDR_BANDWIDTHS[process]['total']

    colors = {'Current': 'tab:orange', 'TDR': 'tab:grey'}
    width = 0.4

    fig, ax = plt.subplots()
    plt.grid(True, axis='y', zorder=0, linestyle='dashed')
    for i_col, (label, bandwidths_by_stream) in enumerate(bandwidths.items()):
        offset = width * i_col
        bars = ax.bar([x + offset for x in range(len(bandwidths_by_stream))],
                      bandwidths_by_stream.values(),
                      width=width,
                      label=label,
                      zorder=3,
                      color=colors[label])
        if process == "spruce":
            # Only label the last bar - dont have per-WG expectations
            ax.bar_label(
                bars,
                labels=[''] * (len(bars) - 1) +
                [round(bandwidths_by_stream['Total'], 2)])
        else:
            ax.bar_label(
                bars,
                labels=[
                    round(val, 2) for val in bandwidths_by_stream.values()
                ])

    ax.set_ylabel('Bandwidth (GB/s)')
    ax.set_title(TITLES[process])
    tick_positions = {
        'hlt2': [x + width / 2 for x in range(len(bandwidths_by_stream))],
        'spruce': [x for x in range(len(bandwidths_by_stream) - 1)] +
        [len(bandwidths_by_stream) - 1 + width / 2]
    }[process]
    ax.set_xticks(tick_positions, bandwidths_by_stream.keys())
    ax.legend(loc='upper center', ncols=2)
    plt.savefig(plot_path, format="png")
    plt.close(fig)


def make_bar_charts(rates_df, column, stream, plot_path):
    """Bar charts of the WG-by-WG rates within 1 stream"""

    fig = plt.figure()
    plt.grid(True, axis='y', zorder=0, linestyle='dashed')
    bars = plt.bar(rates_df['WG'], rates_df[column], zorder=3)
    plt.bar_label(bars, labels=[round(val, 2) for val in rates_df[column]])
    plt.ylabel(column)
    plt.xticks(rates_df['WG'], rates_df['WG'], rotation='vertical')
    plt.subplots_adjust(bottom=0.25)
    plt.title(f'{column} for each WG in the {stream.capitalize()} stream')
    plt.savefig(plot_path, format="png")
    plt.close(fig)


def write_html_page(page_path, rendered_html):
    if rendered_html:
        with open(page_path, "w") as html_file:
            html_file.write(rendered_html)


def _render(html_str):
    return jinja2.Template(html_str).render()


def render_all_lines_page(fname_helper, building_locally):
    csv_path = fname_helper.final_rate_table_all_lines_path(
        "csv", full_path=False)
    html_str = f"""
    <p>
        Rates, event sizes and bandwidths of all lines, listed descending in bandwidth. <br>
        Exclusive retentions/rates are calculated by counting those events in which only that line fired. <br>
        Bandwidths are inclusive: they are calculated by summing raw bank sizes for those events in which the trigger line fired. <br>
        These numbers are also saved in a csv file: <a href="{fname_helper.base_html_path(building_locally)}/{csv_path}">{csv_path}</a>
    </p>
    """
    with open(fname_helper.final_rate_table_all_lines_path("html"),
              "r") as rate_table:
        html_str += rate_table.read()
    return _render(html_str)


def render_top_level_header(script_path, base_path):
    return _render(f"""
        <p>
            slot.build_id: $$version$$<br>
            start time: $$start_time$$<br>
            end time: $$end_time$$<br>
            platform: $$platform$$<br>
            hostname: $$hostname$$<br>
            cpu_info: $$cpu_info$$<br>
            testing script path: {script_path}
        </p>
        <ul>
            <li><a href="{base_path}/run.log">Logs</a></li>
        </ul>
    """)


def render_memory_consumption():
    return _render("""
        <p>
            <b> Memory consumption of this test: </b>
        </p>
        <object type="image/png" data="memory_consumption.png"></object>
        <p>
            Memory consumption as functions of Wall-time. <br>
            The virtual memory size is the total amount of memory the process may hypothetically access. <br>
            The resident set size (RSS) is the portion of memory occupied by the run that is held in main memory (RAM). <br>
            The proportional set size (PSS) is the private memory occupied by the run itself plus the proportion of shared memory with one or more other processes. <br>
            As we only launch one test at the same time, PSS should be close to RSS in this case, and PSS gives the real memory that is used by this test. <br>
            Swap memory is used when RAM is full. <br>
            The maximum resident set size usage is $$max_rss$$ GB. <br>
            The maximum proportional set size usage is $$max_pss$$ GB. <br>
        </p>
    """)


def render_other_line_table(process, lines):
    if process == "hlt1":
        return _render("")

    html_str = """
        <p>
        List of line names that categorized to "Others".
        </p>
        """
    html_str += r'''<table border = "1">
        <tr>
            <th> Name </th>
        </tr>'''
    for line in lines:
        html_str += f'''
            <tr>
                <td> {line} </td>
            </tr>'''
    html_str += '\n</table>'
    return _render(html_str)


def render_dst_data_hists(process):
    if process == "hlt1":
        return _render("")

    html_str = ''
    for hist_suffix in ("data_size", "bandwidth"):
        html_str += f"""
            <object type="image/png" data="{process}__hist__dst_{hist_suffix}.png"></object>
        """
    return _render(html_str)


def render_lines_pie_chart(process):
    if process == "hlt1":
        return _render("")
    return _render(f"""
        <p>
            <b>The number of selection lines per working group:</b> <br>
        </p>
        <object type="image/png" data="{process}__lines_per_wg.png"></object>
        <p>
            "Other" category contains those lines with a parsed name that doesn't belong to any known WG. <br>
            To make lines properly categorized, one should follow the naming convention -
            name of lines should start with `Hlt2/Spruce[WG]_`.
        </p>
    """)


def render_bar_charts(process,
                      stream_config,
                      streams,
                      metrics=('bandwidth', 'rate')):
    if process != "hlt2":
        return _render("")

    html_str = ''
    for metric in metrics:
        html_str += f'''
        <p>
            <b>{metric.capitalize()} within each stream:</b>
        </p>
        <p>
            "TotalInclusive" is the physical rate/bandwidth of the stream. "SumWGs" is the simple arithmetic sum of all bars except "TotalInclusive".<br>
            The difference between the two bars gives us information about the degree of WG-by-WG overlap.
        </p>
        '''
        if stream_config == "production":
            html_str += f'''
            <p>
            <b>Note:</b> The WG bars in the HLT2 Turbo stream correspond almost exactly to the output streams of the Sprucing passthrough of Turbo.<br>
            Furthermore, this means <b>the "SumWGs" {metric} bar of HLT2 turbo is equal to the total physical {metric} of Turbo post-sprucing.</b><br>
            </p>
            '''
        for stream in streams:
            html_str += f'''
                <object type="image/png" data="{process}__{metric}_bar_chart__{stream_config}__{stream}.png"></object>
            '''
    html_str += '''
    <p>
        Rates for a WG within a stream are calculated by counting the number of events saved to that stream in which at least 1 of that WG's lines fired.<br>
        Bandwidths for a WG are calculated by summing the event size of all events saved to the stream in which at least 1 of that WG's lines fired.<br>
    </p>
    '''
    return _render(html_str)


def render_extra_sim_matrices(process, stream_config, streams):
    if process != "hlt2":
        return _render("")

    html_str = """
        <p>
            The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
            It shows how much events in the stream A are covered by another stream B. <br>
            The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
            i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
        </p>
        <p>
            The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
            It shows how similar the two streams are and is useful in bandwidth division. <br>
        </p>
    """
    for stream in streams:
        html_str += f"""
            <p>
                The overlap matrix of the {stream.capitalize()} stream is:
            </p>
        """
        with open(
                fname_helper.intra_stream_overlap_matrix_path(
                    stream_config, stream), "r") as overlap:
            html_str += overlap.read()
        html_str += f"""
            <p>
                The Jaccard similarity matrix of the {stream.capitalize()} stream is:
            </p>
        """
        with open(
                fname_helper.intra_stream_jaccard_similarities_path(
                    stream_config, stream), "r") as jaccard:
            html_str += jaccard.read()
    return _render(html_str)


def list_of_links_html(process: str, fname_helper: FileNameHelper,
                       stream_config: str, building_locally: bool):
    base_path = fname_helper.base_html_path(building_locally)
    links = [
        f"""<li><a href="{base_path}/{process}__all_rates.html"> A single rate/bandwidth table featuring every trigger line in all streams</a></li>"""
    ]
    if process != "hlt1":
        rate_table_split_by_stream = fname_helper.final_rate_table_all_lines_split_by_stream_path(
            stream_config, full_path=False)
        links.append(
            f"""<li><a href="{base_path}/{rate_table_split_by_stream}"> Rate/bandwidth tables for each stream, with 1 row per trigger line</a></li>"""
        )
        rate_table_by_stream_by_wg = fname_helper.final_rate_table_all_lines_split_by_stream_by_wg_path(
            stream_config, full_path=False)
        links.append(
            f"""<li><a href="{base_path}/{rate_table_by_stream_by_wg}"> Rate/bandwidth tables for each stream, split also by WG, with 1 row per trigger line</a></li>"""
        )
        links.append(
            f"""<li><a href="{base_path}/{process}__similarity_matrices.html"> Jaccard similarity and overlap matrices between streams</a></li>"""
        )

    if process == "hlt2":
        links += [
            f"""<li><a href="{base_path}/{process}__extra_bar_charts.html">Bar charts as below for DstData bandwidth</a></li>""",
            f"""<li><a href="{base_path}/{process}__extra_similarity_matrices.html">Similarity and overlap matrices between WGs within each stream</a></li>""",
        ]

    if process != "hlt1":
        links += [
            f"""<li><a href="{base_path}/{process}__other_lines.html">List of lines in "Other" category</a></li>""",
            f"""<li><a href="{base_path}/{fname_helper.line_descr_path(full_path=False)}"> PersistReco and ExtraOutput info for all lines in all streams</a></li>"""
        ]

    return "\n".join(links)


def render_sim_matrices_page(process, fname_helper, stream_config):
    if process == "hlt1":
        return _render("")

    html_str = f"""
        <p>
            The overlap between two streams, A and B, w.r.t to one of the stream, A, is computed as |A n B| / |A|.
            It shows how much events in the stream A are covered by another stream B. <br>
            The columns in the overlap matrices are target streams (A) and the rows are comparison streams (B),
            i.e. the numbers correspond to overlaps w.r.t to the column streams. <br>
        </p>
        <p>
            The overlap matrix of the {stream_config} streams is:
        </p>
    """
    with open(fname_helper.overlap_matrix_path(stream_config), "r") as overlap:
        html_str += overlap.read()
    html_str += f"""
        <p>
            The Jaccard index between two streams, A and B, is computed as |A n B| / |A u B|.
            It shows how similar the two streams are and is useful in bandwidth division. <br>
        </p>
        <p>
            The Jaccard similarity matrix of the {stream_config} streams is:
        </p>
    """
    with open(fname_helper.jaccard_similarities_path(stream_config),
              "r") as jaccard:
        html_str += jaccard.read()
    return _render(html_str)


def _write_message(message,
                   args,
                   tot_rate,
                   tot_bandwidth,
                   n_low_rate,
                   n_high_rate,
                   process_dependent_message=False):
    lines = [
        f"all_jobs_successful_bool = {1 if args.exit_code == 0 else 0}\n",
        f"total_rate = {tot_rate:.2f} kHz\n",
        f"total_bandwidth = {tot_bandwidth:.2f} GB/s\n",
        f"n_low_rate = {n_low_rate:d}\n", f"n_high_rate = {n_high_rate:d}\n"
    ]
    if process_dependent_message:
        lines = [f'{args.process}__{line}' for line in lines]
    message.writelines(lines)
    return 0


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='make_bandwidth_test_page')
    parser.add_argument(
        '-p',
        '--process',
        type=str,
        choices=['hlt1', 'hlt2', 'spruce'],
        required=True,
        help='Which stage was the test run on.')
    parser.add_argument(
        '-c',
        '--input-config',
        type=str,
        required=True,
        help='Path to yaml config file defining the input.')
    parser.add_argument(
        '--stream-config',
        type=str,
        required=True,
        choices=['wg', 'production', 'streamless'],
    )
    parser.add_argument(
        '--streams',
        type=str,
        nargs='+',
        required=True,
        help='List of trigger streams.')
    parser.add_argument(
        '-s',
        '--script-path',
        type=str,
        required=True,
        help=
        'Path to the top-level testing script that is running/calling this script.'
    )
    parser.add_argument(
        '-e',
        '--exit-code',
        type=int,
        required=True,
        help="Cumulative exit code of all previous jobs.")
    parser.add_argument(
        '--make-hlt2-and-spruce-page',
        action='store_true',
        help="Flag to use index page appropriate for multiple processes.")
    parser.add_argument(
        '--skip-top-level-information-for-process-dependent-testpage',
        action='store_true',
        help=
        'Flag to avoid memory-consumption and build information sections of {process}__index page.'
    )
    parser.add_argument(
        '--building-locally',
        action='store_true',
        help=
        'Makes links between pages work for building the pages locally rather than on the LHCbPR website.'
    )
    args = parser.parse_args()

    input_info = parse_yaml(args.input_config)
    fname_helper = FileNameHelper(args.process)

    if args.exit_code == 0:
        exit_code_sentence = "All sub-jobs in this test exited successfully."
        exit_code_bool = 1
        exit_code_col = "green"
    else:
        exit_code_sentence = "There were errors in some of the sub-jobs of this test; please see the logs."
        exit_code_bool = 0
        exit_code_col = "red"

    # Read info of all lines
    df = pd.read_csv(
        fname_helper.final_rate_table_all_lines_path("csv"), sep=',')
    number_of_lines = len(df)

    kHz_to_Hz = 1000
    rate_bw_info_by_line = {
        df['Line'][i]: LineRateBWInfo(
            df['Rate (kHz)'][i] * kHz_to_Hz, df["Avg DstData Size (kB)"][i],
            df["Avg Total Event Size (kB)"][i],
            df["DstData Bandwidth (GB/s)"][i], df["Total Bandwidth (GB/s)"][i])
        for i in range(number_of_lines)
    }

    # Prepare messages to GitLab
    # limits on rate: 1 MHz for Hlt1, 1 kHz for Hlt2 rate and 0.5% for Sprucing retention
    tol = {'hlt1': 1e6, 'hlt2': 1000, 'spruce': 500}[args.process]
    n_low_rate = len(
        [info for info in rate_bw_info_by_line.values() if info.rate == 0])
    n_high_rate = len(
        [info for info in rate_bw_info_by_line.values() if info.rate > tol])

    prod_df = pd.read_csv(
        fname_helper.final_rate_table_all_streams_path(
            args.stream_config, ext="csv"))
    tot_rate = sum(prod_df['Rate (kHz)'])
    tot_bandwidth = sum(prod_df['Total Bandwidth (GB/s)'])

    # Make plots & tables
    other_line_list = make_plots(
        rate_bw_info_by_line,
        tot_rate=tot_rate,
        tot_bandwidth=tot_bandwidth,
        fname_helper=fname_helper,
        process=args.process)

    # Headline bar charts
    headline_bar_chart_path_for_html = ""
    if args.stream_config != "streamless":
        main_rate_df = pd.read_csv(
            fname_helper.final_rate_table_all_streams_path(
                args.stream_config, ext='csv'))
        headline_bar_chart_path_for_html = fname_helper.bar_chart_path(
            args.stream_config, 'headline', 'bandwidth', full_path=False)
        headline_bar_charts(
            main_rate_df, args.process,
            fname_helper.bar_chart_path(args.stream_config, 'headline',
                                        'bandwidth'))

    # Bar charts within a stream - only relevant for HLT2
    if args.process == 'hlt2':
        for stream in args.streams:
            intra_stream_rates_df = pd.read_csv(
                fname_helper.tmp_rate_table_intra_stream_path(
                    args.stream_config, stream),
                header=None)
            intra_stream_rates_df.columns = [
                'WG', 'Rate (kHz)', 'Bandwidth (GB/s)',
                'DstData Bandwidth (GB/s)'
            ]
            for column_header in intra_stream_rates_df.columns[1:]:
                fname = {
                    'Rate (kHz)': "rate",
                    'Bandwidth (GB/s)': "bandwidth",
                    'DstData Bandwidth (GB/s)': "dstbandwidth"
                }[column_header]
                make_bar_charts(
                    intra_stream_rates_df, column_header, stream,
                    fname_helper.bar_chart_path(args.stream_config, stream,
                                                fname))

    with open(
            fname_helper.final_rate_table_all_streams_path(args.stream_config),
            "r") as rate_html:
        table_main_stream_rates = rate_html.read()

    base_path = fname_helper.base_html_path(args.building_locally)
    if args.skip_top_level_information_for_process_dependent_testpage:
        top_level_header = ""
        memory_consumption = ""
    else:
        top_level_header = render_top_level_header(args.script_path, base_path)
        memory_consumption = render_memory_consumption()

    with open(
            fname_helper.html_page_outputs_path(f"{args.process}__index.html"),
            "w") as html_file:
        html = SINGLE_PROCESS_REPORT_TEMPLATE.render(
            TOP_LEVEL_HEADER=top_level_header,
            MEMORY_CONSUMPTION=memory_consumption,
            BASE_PATH=base_path,
            stream_config_json=fname_helper.stream_config_json_path(
                args.stream_config, full_path=False),
            main_rate_table=table_main_stream_rates,
            BAR_CHARTS=render_bar_charts(args.process, args.stream_config,
                                         args.streams),
            HEADLINE_BAR_CHART_PATH=headline_bar_chart_path_for_html,
            LIST_OF_LINKS=list_of_links_html(args.process, fname_helper,
                                             args.stream_config,
                                             args.building_locally),
            LINES_PER_WG=render_lines_pie_chart(args.process),
            DST_DATA_HIST=render_dst_data_hists(args.process),
            INPUT_CONFIG_PATH=os.path.expandvars(args.input_config),
            INPUT_RATE=input_info['input_rate'],
            INPUT_NU=input_info['nu'],
            INPUT_VELO_RADIUS=input_info['velo_radial_opening'],
            EXIT_CODE_SENTENCE=exit_code_sentence,
            EXIT_CODE_COLOUR=exit_code_col,
            PROCESS=args.process)
        html_file.write(html)

    # Extra pages
    write_html_page(
        fname_helper.process_dependent_html_page_outputs_path(
            "other_lines.html"),
        render_other_line_table(args.process, other_line_list))

    write_html_page(
        fname_helper.process_dependent_html_page_outputs_path(
            "all_rates.html"),
        render_all_lines_page(fname_helper, args.building_locally))

    write_html_page(
        fname_helper.process_dependent_html_page_outputs_path(
            "similarity_matrices.html"),
        render_sim_matrices_page(args.process, fname_helper,
                                 args.stream_config))

    write_html_page(
        fname_helper.process_dependent_html_page_outputs_path(
            "extra_bar_charts.html"),
        render_bar_charts(
            args.process,
            args.stream_config,
            args.streams,
            metrics=['dstbandwidth']))

    write_html_page(
        fname_helper.process_dependent_html_page_outputs_path(
            "extra_similarity_matrices.html"),
        render_extra_sim_matrices(args.process, args.stream_config,
                                  args.streams))

    with open(fname_helper.html_page_outputs_path("index.html"),
              "w") as html_file:
        if args.make_hlt2_and_spruce_page:
            html = HLT2_AND_SPRUCE_REPORT_TEMPLATE.render(
                TOP_LEVEL_HEADER=render_top_level_header(
                    args.script_path, base_path),
                BASE_PATH=base_path,
                MEMORY_CONSUMPTION=render_memory_consumption())
            html_file.write(html)
        else:
            # In single-process tests, need 'index.html' to be picked up.
            with open(
                    fname_helper.html_page_outputs_path(
                        f"{args.process}__index.html"),
                    "r") as process_dependent_html_file:
                html_file.write(process_dependent_html_file.read())

    with open(
            fname_helper.html_page_outputs_path(
                f"{args.process}__message.txt"), "w") as message:
        _write_message(
            message=message,
            args=args,
            tot_rate=tot_rate,
            tot_bandwidth=tot_bandwidth,
            n_low_rate=n_low_rate,
            n_high_rate=n_high_rate,
            process_dependent_message=False)
    with open(
            fname_helper.html_page_outputs_path("message.txt"),
            "a" if args.make_hlt2_and_spruce_page else "w") as message:
        _write_message(
            message=message,
            args=args,
            tot_rate=tot_rate,
            tot_bandwidth=tot_bandwidth,
            n_low_rate=n_low_rate,
            n_high_rate=n_high_rate,
            process_dependent_message=True)
        pass