From 86870d8f9c97c2b8264aab451edb628713101217 Mon Sep 17 00:00:00 2001
From: Arthur Hennequin <arthur.hennequin@cern.ch>
Date: Tue, 6 Jun 2023 10:54:27 +0200
Subject: [PATCH] Handle multiple throughput options in a single job

---
 handlers/ThroughputProfileHandler.py | 290 ++++++++++++++-------------
 1 file changed, 153 insertions(+), 137 deletions(-)

diff --git a/handlers/ThroughputProfileHandler.py b/handlers/ThroughputProfileHandler.py
index 482004ef..6cf92feb 100644
--- a/handlers/ThroughputProfileHandler.py
+++ b/handlers/ThroughputProfileHandler.py
@@ -176,6 +176,19 @@ def send_gitlab_feedback(
         remove_labels=remove_labels,
     )
 
+def getOptions(options, log_files):
+    d = set()
+    for f in log_files:
+        name = f.split('/')[-1].split(".")[0]
+        option = "_".join(name.split("_")[1:])
+        d.add(option)
+    if len(d) == 0:
+        return [options], [log_files]
+    options = list(d)
+    all_log_files = []
+    for option in options:
+        all_log_files.append([f for f in log_files if option in f])
+    return options, all_log_files
 
 class ThroughputProfileHandler(BaseHandler):
     def __init__(self):
@@ -208,151 +221,154 @@ class ThroughputProfileHandler(BaseHandler):
             if f.endswith(".log")
         ]
 
-        throughput = sum(get_throughput(f) for f in log_files if "ThroughputTest" in f)
-        str_tput = "{:.1f}".format(throughput)
-        self.saveFloat(
-            "max_throughput",
-            throughput,
-            description="maximum throughput",
-            group="throughput",
-        )
-
-        # measure the total bandwidth
-        # only for sprucing for now
-        measure_bandwidth = options in ['Moore_spruce_all_lines']
-        if measure_bandwidth:
-            run_log = os.path.join(directory, "run.log")
-            if not os.path.isfile(run_log):
-                log.warning('There is no run.log!')
-                measure_bandwidth = False
-        bandwidth = get_bandwidth(run_log) if measure_bandwidth else 0.
-
-        dirname = (
-            f"Throughput_{version}_{options}_{platform}_{startTime.replace(' ', '_')}"
-        )
-        targetRootWebDir = os.path.join(WWW_BASE_URL, dirname)
+        options, log_files = getOptions(options, log_files)
 
-        # concatenate log files into one file
-        with open("tests.log", "w") as outfile:
-            for fname in log_files:
-                outfile.write(
-                    "\n{sep}\n{fname}\n{sep}\n\n".format(sep="=" * 80, fname=fname)
-                )
-                with open(fname) as infile:
-                    for line in infile:
-                        outfile.write(line)
-
-        trend_url = os.path.join(WWW_BASE_URL, f"trend_throughput_{options}_{slot}.png")
-        request = requests.get(trend_url)
-        if request.status_code != 200:
-            trend_url = None
-
-        with open("index.html", "w") as html_file:
-            html = REPORT_TEMPLATE.render(
-                version=version,
-                platform=platform,
-                hostname=hostname,
-                cpu_info=cpu_info,
-                options=options,
-                throughput=str_tput,
-                WWW_BASE_URL=WWW_BASE_URL,
-                dirname=dirname,
-                trend_url=trend_url,
+        for options, log_files in zip(options, log_files):
+            throughput = sum(get_throughput(f) for f in log_files if "ThroughputTest" in f)
+            str_tput = "{:.1f}".format(throughput)
+            self.saveFloat(
+                "max_throughput",
+                throughput,
+                description="maximum throughput",
+                group="throughput",
             )
-            html_file.write(html)
-            log.debug("Generated HTML report:\n" + html)
-
-        for filename in [
-            os.path.join(directory, "flamy.svg"),
-            os.path.join(directory, "flamy.svg"),
-            os.path.join(directory, "FlameBars.pdf"),
-            os.path.join(directory, "FlameBars.png"),
-            "index.html",
-            "tests.log",
-        ]:
-            publish.upload_eos_www(
-                filename,
-                os.path.join(dirname, os.path.basename(filename)),
+
+            # measure the total bandwidth
+            # only for sprucing for now
+            measure_bandwidth = options in ['Moore_spruce_all_lines']
+            if measure_bandwidth:
+                run_log = os.path.join(directory, "run.log")
+                if not os.path.isfile(run_log):
+                    log.warning('There is no run.log!')
+                    measure_bandwidth = False
+            bandwidth = get_bandwidth(run_log) if measure_bandwidth else 0.
+
+            dirname = (
+                f"Throughput_{version}_{options}_{platform}_{startTime.replace(' ', '_')}"
             )
+            targetRootWebDir = os.path.join(WWW_BASE_URL, dirname)
 
-        self.saveString(
-            "algousage",
-            os.path.join(targetRootWebDir, "flamy.svg"),
-            description="link to algo usage plot",
-            group="performance",
-        )
+            # concatenate log files into one file
+            with open("tests.log", "w") as outfile:
+                for fname in log_files:
+                    outfile.write(
+                        "\n{sep}\n{fname}\n{sep}\n\n".format(sep="=" * 80, fname=fname)
+                    )
+                    with open(fname) as infile:
+                        for line in infile:
+                            outfile.write(line)
+
+            trend_url = os.path.join(WWW_BASE_URL, f"trend_throughput_{options}_{slot}.png")
+            request = requests.get(trend_url)
+            if request.status_code != 200:
+                trend_url = None
+
+            with open("index.html", "w") as html_file:
+                html = REPORT_TEMPLATE.render(
+                    version=version,
+                    platform=platform,
+                    hostname=hostname,
+                    cpu_info=cpu_info,
+                    options=options,
+                    throughput=str_tput,
+                    WWW_BASE_URL=WWW_BASE_URL,
+                    dirname=dirname,
+                    trend_url=trend_url,
+                )
+                html_file.write(html)
+                log.debug("Generated HTML report:\n" + html)
+
+            for filename in [
+                os.path.join(directory, "flamy.svg"),
+                os.path.join(directory, "flamy.svg"),
+                os.path.join(directory, "FlameBars.pdf"),
+                os.path.join(directory, "FlameBars.png"),
+                "index.html",
+                "tests.log",
+            ]:
+                publish.upload_eos_www(
+                    filename,
+                    os.path.join(dirname, os.path.basename(filename)),
+                )
 
-        # send notification on mattermost channel
-        cpu_model = cpu_info.split(" @")[0].replace("(R)", "").replace(" ", "-")
-        mattermost_message = (
-            "The results of latest throughput test "
-            f"[{options} {version} {platform} {cpu_model}]({targetRootWebDir}):\n"
-            f"`Throughput = {str_tput} Events/s`"
-        )
-        if measure_bandwidth:
-            mattermost_message += (
-                f", `Bandwidth = {bandwidth:.1f} MB/s`"
+            self.saveString(
+                "algousage",
+                os.path.join(targetRootWebDir, "flamy.svg"),
+                description="link to algo usage plot",
+                group="performance",
             )
-        publish.post_mattermost(mattermost_message)
-        # let's post a reply to gitlab about the throughput test result
-        if (slot in ["lhcb-master-mr", "lhcb-master-ref", "lhcb-master"]) and (
-            options
-            in [
-                "Moore_hlt1_pp_default",
-                "Moore_hlt2_reco_baseline",
-                "Moore_hlt2_fastest_reco",
-                "Moore_hlt2_pp_thor",
-                "Moore_spruce_all_lines",
-            ]
-        ):
-            # The feedback needs to compare the results from the reference (*-ref or master)
-            # and the -mr builds. We don't know which completes first,
-            # so we must try both cases.
-            # For a better treatment in the future, see LBCORE-1984
-            for ref, test, trigger in dashboard.get_ci_test_pairs(slot, build_id):
-                try:
-                    if test == (slot, build_id):
-                        # The handler runs for the -mr build, so fetch the -ref results
-                        new_throughput = throughput
-                        web_link = targetRootWebDir
-                        new_bandwidth = bandwidth
-                        ref_throughput, ref_web_link = get_couchdb_throughput_link(
-                            ref[0], ref[1], options
-                        )
-                        if measure_bandwidth:
-                            ref_bandwidth = get_couchdb_bandwidth(
+
+            # send notification on mattermost channel
+            cpu_model = cpu_info.split(" @")[0].replace("(R)", "").replace(" ", "-")
+            mattermost_message = (
+                "The results of latest throughput test "
+                f"[{options} {version} {platform} {cpu_model}]({targetRootWebDir}):\n"
+                f"`Throughput = {str_tput} Events/s`"
+            )
+            if measure_bandwidth:
+                mattermost_message += (
+                    f", `Bandwidth = {bandwidth:.1f} MB/s`"
+                )
+            publish.post_mattermost(mattermost_message)
+            # let's post a reply to gitlab about the throughput test result
+            if (slot in ["lhcb-master-mr", "lhcb-master-ref", "lhcb-master"]) and (
+                options
+                in [
+                    "Moore_hlt1_pp_default",
+                    "Moore_hlt2_reco_baseline",
+                    "Moore_hlt2_fastest_reco",
+                    "Moore_hlt2_pp_thor",
+                    "Moore_spruce_all_lines",
+                ]
+            ):
+                # The feedback needs to compare the results from the reference (*-ref or master)
+                # and the -mr builds. We don't know which completes first,
+                # so we must try both cases.
+                # For a better treatment in the future, see LBCORE-1984
+                for ref, test, trigger in dashboard.get_ci_test_pairs(slot, build_id):
+                    try:
+                        if test == (slot, build_id):
+                            # The handler runs for the -mr build, so fetch the -ref results
+                            new_throughput = throughput
+                            web_link = targetRootWebDir
+                            new_bandwidth = bandwidth
+                            ref_throughput, ref_web_link = get_couchdb_throughput_link(
                                 ref[0], ref[1], options
                             )
-                        else: ref_bandwidth = 0.
-                    elif ref == (slot, build_id):
-                        # The handler runs for the -ref build, so fetch the -mr results
-                        ref_throughput = throughput
-                        ref_web_link = targetRootWebDir
-                        ref_bandwidth = bandwidth
-                        new_throughput, web_link = get_couchdb_throughput_link(
-                            test[0], test[1], options
-                        )
-                        if measure_bandwidth:
-                            new_bandwidth = get_couchdb_bandwidth(
+                            if measure_bandwidth:
+                                ref_bandwidth = get_couchdb_bandwidth(
+                                    ref[0], ref[1], options
+                                )
+                            else: ref_bandwidth = 0.
+                        elif ref == (slot, build_id):
+                            # The handler runs for the -ref build, so fetch the -mr results
+                            ref_throughput = throughput
+                            ref_web_link = targetRootWebDir
+                            ref_bandwidth = bandwidth
+                            new_throughput, web_link = get_couchdb_throughput_link(
                                 test[0], test[1], options
                             )
-                        else: new_bandwidth = 0.
+                            if measure_bandwidth:
+                                new_bandwidth = get_couchdb_bandwidth(
+                                    test[0], test[1], options
+                                )
+                            else: new_bandwidth = 0.
+                        else:
+                            assert False
+                    except dashboard.ResourceNotFound:
+                        # The job for the other build hasn't finished yet => do nothing.
+                        # The message will be posted from the other job's handler.
+                        log.warning(
+                            "Could not fetch results for other slot, not posting reply."
+                        )
                     else:
-                        assert False
-                except dashboard.ResourceNotFound:
-                    # The job for the other build hasn't finished yet => do nothing.
-                    # The message will be posted from the other job's handler.
-                    log.warning(
-                        "Could not fetch results for other slot, not posting reply."
-                    )
-                else:
-                    send_gitlab_feedback(
-                        new_throughput,
-                        ref_throughput,
-                        new_bandwidth,
-                        ref_bandwidth,
-                        options,
-                        web_link,
-                        ref_web_link,
-                        trigger,
-                    )
+                        send_gitlab_feedback(
+                            new_throughput,
+                            ref_throughput,
+                            new_bandwidth,
+                            ref_bandwidth,
+                            options,
+                            web_link,
+                            ref_web_link,
+                            trigger,
+                        )
-- 
GitLab