From 86870d8f9c97c2b8264aab451edb628713101217 Mon Sep 17 00:00:00 2001 From: Arthur Hennequin <arthur.hennequin@cern.ch> Date: Tue, 6 Jun 2023 10:54:27 +0200 Subject: [PATCH] Handle multiple throughput options in a single job --- handlers/ThroughputProfileHandler.py | 290 ++++++++++++++------------- 1 file changed, 153 insertions(+), 137 deletions(-) diff --git a/handlers/ThroughputProfileHandler.py b/handlers/ThroughputProfileHandler.py index 482004ef..6cf92feb 100644 --- a/handlers/ThroughputProfileHandler.py +++ b/handlers/ThroughputProfileHandler.py @@ -176,6 +176,19 @@ def send_gitlab_feedback( remove_labels=remove_labels, ) +def getOptions(options, log_files): + d = set() + for f in log_files: + name = f.split('/')[-1].split(".")[0] + option = "_".join(name.split("_")[1:]) + d.add(option) + if len(d) == 0: + return [options], [log_files] + options = list(d) + all_log_files = [] + for option in options: + all_log_files.append([f for f in log_files if option in f]) + return options, all_log_files class ThroughputProfileHandler(BaseHandler): def __init__(self): @@ -208,151 +221,154 @@ class ThroughputProfileHandler(BaseHandler): if f.endswith(".log") ] - throughput = sum(get_throughput(f) for f in log_files if "ThroughputTest" in f) - str_tput = "{:.1f}".format(throughput) - self.saveFloat( - "max_throughput", - throughput, - description="maximum throughput", - group="throughput", - ) - - # measure the total bandwidth - # only for sprucing for now - measure_bandwidth = options in ['Moore_spruce_all_lines'] - if measure_bandwidth: - run_log = os.path.join(directory, "run.log") - if not os.path.isfile(run_log): - log.warning('There is no run.log!') - measure_bandwidth = False - bandwidth = get_bandwidth(run_log) if measure_bandwidth else 0. - - dirname = ( - f"Throughput_{version}_{options}_{platform}_{startTime.replace(' ', '_')}" - ) - targetRootWebDir = os.path.join(WWW_BASE_URL, dirname) + options, log_files = getOptions(options, log_files) - # concatenate log files into one file - with open("tests.log", "w") as outfile: - for fname in log_files: - outfile.write( - "\n{sep}\n{fname}\n{sep}\n\n".format(sep="=" * 80, fname=fname) - ) - with open(fname) as infile: - for line in infile: - outfile.write(line) - - trend_url = os.path.join(WWW_BASE_URL, f"trend_throughput_{options}_{slot}.png") - request = requests.get(trend_url) - if request.status_code != 200: - trend_url = None - - with open("index.html", "w") as html_file: - html = REPORT_TEMPLATE.render( - version=version, - platform=platform, - hostname=hostname, - cpu_info=cpu_info, - options=options, - throughput=str_tput, - WWW_BASE_URL=WWW_BASE_URL, - dirname=dirname, - trend_url=trend_url, + for options, log_files in zip(options, log_files): + throughput = sum(get_throughput(f) for f in log_files if "ThroughputTest" in f) + str_tput = "{:.1f}".format(throughput) + self.saveFloat( + "max_throughput", + throughput, + description="maximum throughput", + group="throughput", ) - html_file.write(html) - log.debug("Generated HTML report:\n" + html) - - for filename in [ - os.path.join(directory, "flamy.svg"), - os.path.join(directory, "flamy.svg"), - os.path.join(directory, "FlameBars.pdf"), - os.path.join(directory, "FlameBars.png"), - "index.html", - "tests.log", - ]: - publish.upload_eos_www( - filename, - os.path.join(dirname, os.path.basename(filename)), + + # measure the total bandwidth + # only for sprucing for now + measure_bandwidth = options in ['Moore_spruce_all_lines'] + if measure_bandwidth: + run_log = os.path.join(directory, "run.log") + if not os.path.isfile(run_log): + log.warning('There is no run.log!') + measure_bandwidth = False + bandwidth = get_bandwidth(run_log) if measure_bandwidth else 0. + + dirname = ( + f"Throughput_{version}_{options}_{platform}_{startTime.replace(' ', '_')}" ) + targetRootWebDir = os.path.join(WWW_BASE_URL, dirname) - self.saveString( - "algousage", - os.path.join(targetRootWebDir, "flamy.svg"), - description="link to algo usage plot", - group="performance", - ) + # concatenate log files into one file + with open("tests.log", "w") as outfile: + for fname in log_files: + outfile.write( + "\n{sep}\n{fname}\n{sep}\n\n".format(sep="=" * 80, fname=fname) + ) + with open(fname) as infile: + for line in infile: + outfile.write(line) + + trend_url = os.path.join(WWW_BASE_URL, f"trend_throughput_{options}_{slot}.png") + request = requests.get(trend_url) + if request.status_code != 200: + trend_url = None + + with open("index.html", "w") as html_file: + html = REPORT_TEMPLATE.render( + version=version, + platform=platform, + hostname=hostname, + cpu_info=cpu_info, + options=options, + throughput=str_tput, + WWW_BASE_URL=WWW_BASE_URL, + dirname=dirname, + trend_url=trend_url, + ) + html_file.write(html) + log.debug("Generated HTML report:\n" + html) + + for filename in [ + os.path.join(directory, "flamy.svg"), + os.path.join(directory, "flamy.svg"), + os.path.join(directory, "FlameBars.pdf"), + os.path.join(directory, "FlameBars.png"), + "index.html", + "tests.log", + ]: + publish.upload_eos_www( + filename, + os.path.join(dirname, os.path.basename(filename)), + ) - # send notification on mattermost channel - cpu_model = cpu_info.split(" @")[0].replace("(R)", "").replace(" ", "-") - mattermost_message = ( - "The results of latest throughput test " - f"[{options} {version} {platform} {cpu_model}]({targetRootWebDir}):\n" - f"`Throughput = {str_tput} Events/s`" - ) - if measure_bandwidth: - mattermost_message += ( - f", `Bandwidth = {bandwidth:.1f} MB/s`" + self.saveString( + "algousage", + os.path.join(targetRootWebDir, "flamy.svg"), + description="link to algo usage plot", + group="performance", ) - publish.post_mattermost(mattermost_message) - # let's post a reply to gitlab about the throughput test result - if (slot in ["lhcb-master-mr", "lhcb-master-ref", "lhcb-master"]) and ( - options - in [ - "Moore_hlt1_pp_default", - "Moore_hlt2_reco_baseline", - "Moore_hlt2_fastest_reco", - "Moore_hlt2_pp_thor", - "Moore_spruce_all_lines", - ] - ): - # The feedback needs to compare the results from the reference (*-ref or master) - # and the -mr builds. We don't know which completes first, - # so we must try both cases. - # For a better treatment in the future, see LBCORE-1984 - for ref, test, trigger in dashboard.get_ci_test_pairs(slot, build_id): - try: - if test == (slot, build_id): - # The handler runs for the -mr build, so fetch the -ref results - new_throughput = throughput - web_link = targetRootWebDir - new_bandwidth = bandwidth - ref_throughput, ref_web_link = get_couchdb_throughput_link( - ref[0], ref[1], options - ) - if measure_bandwidth: - ref_bandwidth = get_couchdb_bandwidth( + + # send notification on mattermost channel + cpu_model = cpu_info.split(" @")[0].replace("(R)", "").replace(" ", "-") + mattermost_message = ( + "The results of latest throughput test " + f"[{options} {version} {platform} {cpu_model}]({targetRootWebDir}):\n" + f"`Throughput = {str_tput} Events/s`" + ) + if measure_bandwidth: + mattermost_message += ( + f", `Bandwidth = {bandwidth:.1f} MB/s`" + ) + publish.post_mattermost(mattermost_message) + # let's post a reply to gitlab about the throughput test result + if (slot in ["lhcb-master-mr", "lhcb-master-ref", "lhcb-master"]) and ( + options + in [ + "Moore_hlt1_pp_default", + "Moore_hlt2_reco_baseline", + "Moore_hlt2_fastest_reco", + "Moore_hlt2_pp_thor", + "Moore_spruce_all_lines", + ] + ): + # The feedback needs to compare the results from the reference (*-ref or master) + # and the -mr builds. We don't know which completes first, + # so we must try both cases. + # For a better treatment in the future, see LBCORE-1984 + for ref, test, trigger in dashboard.get_ci_test_pairs(slot, build_id): + try: + if test == (slot, build_id): + # The handler runs for the -mr build, so fetch the -ref results + new_throughput = throughput + web_link = targetRootWebDir + new_bandwidth = bandwidth + ref_throughput, ref_web_link = get_couchdb_throughput_link( ref[0], ref[1], options ) - else: ref_bandwidth = 0. - elif ref == (slot, build_id): - # The handler runs for the -ref build, so fetch the -mr results - ref_throughput = throughput - ref_web_link = targetRootWebDir - ref_bandwidth = bandwidth - new_throughput, web_link = get_couchdb_throughput_link( - test[0], test[1], options - ) - if measure_bandwidth: - new_bandwidth = get_couchdb_bandwidth( + if measure_bandwidth: + ref_bandwidth = get_couchdb_bandwidth( + ref[0], ref[1], options + ) + else: ref_bandwidth = 0. + elif ref == (slot, build_id): + # The handler runs for the -ref build, so fetch the -mr results + ref_throughput = throughput + ref_web_link = targetRootWebDir + ref_bandwidth = bandwidth + new_throughput, web_link = get_couchdb_throughput_link( test[0], test[1], options ) - else: new_bandwidth = 0. + if measure_bandwidth: + new_bandwidth = get_couchdb_bandwidth( + test[0], test[1], options + ) + else: new_bandwidth = 0. + else: + assert False + except dashboard.ResourceNotFound: + # The job for the other build hasn't finished yet => do nothing. + # The message will be posted from the other job's handler. + log.warning( + "Could not fetch results for other slot, not posting reply." + ) else: - assert False - except dashboard.ResourceNotFound: - # The job for the other build hasn't finished yet => do nothing. - # The message will be posted from the other job's handler. - log.warning( - "Could not fetch results for other slot, not posting reply." - ) - else: - send_gitlab_feedback( - new_throughput, - ref_throughput, - new_bandwidth, - ref_bandwidth, - options, - web_link, - ref_web_link, - trigger, - ) + send_gitlab_feedback( + new_throughput, + ref_throughput, + new_bandwidth, + ref_bandwidth, + options, + web_link, + ref_web_link, + trigger, + ) -- GitLab