diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 02a3d41ef4ced7453004f8c299144b592e74101c..56b036c96d601a1075b4a289dbc0fd8240ebc04f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,9 +15,6 @@ variables: RUN_THROUGHPUT_OPTIONS_HIP: "-n 5000 --events-per-slice 5000 -m 3000 -t 10 -r 1000" RUN_THROUGHPUT_OPTIONS_CPU: "-n 100 -m 100 -r 200" - AVG_THROUGHPUT_DECREASE_THRESHOLD: "-2.5" # (%); fail throughput check if averaged throughput % change falls below -2.5% - DEVICE_THROUGHPUT_DECREASE_THRESHOLD: "-7.5" # (%); fail throughput check if single device throughput % change falls below -10.0% - OVERRIDE_CUDA_ARCH_FLAG: "-gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_86,code=sm_86" stages: diff --git a/checker/plotting/post_combined_message.py b/checker/plotting/post_combined_message.py index 45a1c8a612c0569cac8a549f377b15f1fde60218..752403e87a0c87119554375f4a2ea30dd7123e38 100644 --- a/checker/plotting/post_combined_message.py +++ b/checker/plotting/post_combined_message.py @@ -12,6 +12,55 @@ from csv_plotter import ( parse_throughput, ) +DEVICE_THROUGHPUT_DECREASE_THRESHOLD = -0.075 +AVG_THROUGHPUT_DECREASE_THRESHOLD = -0.025 +# By default weights are 1.0 if not specified. +# When a weight is less than one for a device, it will contribute +# correspondlingly less to the average and its individual threshold will +# be relaxed. +DEVICE_WEIGHTS = { + "MI100": 0.5, +} + + +def check_throughput_change(speedup_wrt_master): + problems = [] + weights = { + device: DEVICE_WEIGHTS.get(device, 1.0) + for device in speedup_wrt_master + } + + # Average throughputs across all devices and complain if we are above decr % threshold + assert len(speedup_wrt_master) > 0 + average_speedup = (sum(speedup * weights[device] + for device, speedup in speedup_wrt_master.items()) / + sum(weights.values())) + change = average_speedup - 1.0 + print(f"Device-averaged speedup: {average_speedup}") + print(f" % change: {change}") + tput_tol = AVG_THROUGHPUT_DECREASE_THRESHOLD + if change < tput_tol: + msg = ( + f" :warning: :eyes: **average** throughput change {change*100}% " + + f"_exceeds_ {abs(tput_tol)} % threshold") + print(msg) + problems.append(msg) + + # single device throughput decrease check + for device, speedup in speedup_wrt_master.items(): + change = speedup - 1.0 + tput_tol = DEVICE_THROUGHPUT_DECREASE_THRESHOLD / weights[device] + print(f"{device} speedup: {speedup}") + print(f"{device} % change: {change*100}") + if change < tput_tol: + msg = ( + f":warning: :eyes: **{device}** throughput change {change*100}% " + + f"_exceeds_ {abs(tput_tol)}% threshold") + print(msg) + problems.append(msg) + + return problems + def main(): """ @@ -19,13 +68,15 @@ def main(): """ usage = ( "%prog [options] <-t throughput_data_file> <-b throughput_breakdown_data_file>\n" - + 'Example: %prog -t throughput_data.csv -b throughput_breakdown.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"' + + + 'Example: %prog -t throughput_data.csv -b throughput_breakdown.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"' ) parser = OptionParser(usage=usage) parser.add_option( "-m", "--mattermost_url", - default=os.environ["MATTERMOST_KEY"] if "MATTERMOST_KEY" in os.environ else "", + default=os.environ["MATTERMOST_KEY"] + if "MATTERMOST_KEY" in os.environ else "", dest="mattermost_url", help="The url where to post outputs generated for mattermost", ) @@ -48,23 +99,8 @@ def main(): default="", help="Title for your graph. (default: empty string)", ) - - parser.add_option("-j", "--job", dest="job", default="", help="Name of CI job") - parser.add_option( - "--allowed-average-decrease", - dest="min_avg_tput_change", - default=-2.5, - help="Max tolerated average throughput decrease (%).", - ) - - parser.add_option( - "--allowed-single-decrease", - dest="min_single_tput_change", - default=-5.0, - help="Max tolerated single-device throughput decrease (%).", - ) - + "-j", "--job", dest="job", default="", help="Name of CI job") (options, args) = parser.parse_args() if options.mattermost_url == "": @@ -78,51 +114,13 @@ def main(): breakdown = parse_throughput(csvfile.read(), scale=1) master_throughput = get_master_throughput( - options.job, csvfile=options.throughput, scale=1e-3 - ) - + options.job, csvfile=options.throughput, scale=1e-3) speedup_wrt_master = { - a: throughput.get(a, b) / b for a, b in master_throughput.items() + a: throughput.get(a, b) / b + for a, b in master_throughput.items() } - # Average throughputs across all devices and complain if we are above decr % threshold - avg_throughput_decr = False - single_throughput_decr = False - extra_messages = "" - - n_dev = len(speedup_wrt_master.values()) - if n_dev > 0: - average_speedup = sum(speedup_wrt_master.values()) / n_dev - change = (average_speedup - 1.0) * 100.0 - - print(f"Device-averaged speedup: {average_speedup}") - print(f" % change: {change}") - - extra_messages = f"*Device-averaged speedup (% change):* {average_speedup:.2f} ({change:.2f} %)" - - tput_tol = float(options.min_avg_tput_change) - - if change < tput_tol: - print("*** Average throughput decrease above threshold.") - extra_messages += f" :warning: :eyes: decrease _exceeds_ {abs(float(tput_tol))} % threshold\n" - avg_throughput_decr = True - else: - print("No throughput reference available") - extra_messages = f":warning: No reference available for comparison." - - # single device throughput decrease check - extra_messages += "\n" if len(extra_messages) > 0 else "" - tput_tol = float(options.min_single_tput_change) - - for device, speedup in speedup_wrt_master.items(): - change = (speedup - 1.0) * 100.0 - print(f"{device} speedup: {speedup}") - print(f"{device} % change: {change}") - - if change < tput_tol: - print(f"*** {device} Single-device throughput decrease above threshold.") - extra_messages += f":warning: :eyes: **{device}** throughput decrease _exceeds_ {abs(float(tput_tol))} % threshold\n" - avg_throughput_decr = True + problems = check_throughput_change(speedup_wrt_master) throughput_text = produce_plot( throughput, @@ -133,16 +131,20 @@ def main(): ) breakdown_text = produce_plot(breakdown, unit="%", print_text=True) - text = f"{options.title}:\n```\n{throughput_text}```\n{extra_messages}\n\nBreakdown of sequence:\n```\n{breakdown_text}```" - if options.mattermost_url is not None: + extra_message = "\n".join(problems) + text = f"""{options.title}: +``` +{throughput_text}``` +{extra_message} + +Breakdown of sequence: +``` +{breakdown_text}```""" send_to_mattermost(text, options.mattermost_url) - if avg_throughput_decr: - sys.exit(5) - - if single_throughput_decr: - sys.exit(6) + if problems: + sys.exit(7) if __name__ == "__main__": diff --git a/checker/plotting/post_telegraf.py b/checker/plotting/post_telegraf.py index 4ec00fa1cd4ac5d27934c5d32bb8decbae175262..85775c8bf332bbd00b50e7314dcaf179f40f25b2 100755 --- a/checker/plotting/post_telegraf.py +++ b/checker/plotting/post_telegraf.py @@ -35,9 +35,9 @@ def send_to_telegraf(throughput, device, options): print("Sending telegraf string: %s" % telegraf_string) response = session.post(options.telegraf_url, data=telegraf_string) print("http response: %s" % response.headers) - except: + except Exception as e: print("Failed to submit data string %s" % telegraf_string) - print(traceback.format_exc()) + print(str(e)) """ diff --git a/checker/plotting/speedup_cli_plot.py b/checker/plotting/speedup_cli_plot.py deleted file mode 100755 index 1b6d966411a888170a341eaea44ba390b585354b..0000000000000000000000000000000000000000 --- a/checker/plotting/speedup_cli_plot.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/python3 -############################################################################### -# (c) Copyright 2018-2020 CERN for the benefit of the LHCb Collaboration # -############################################################################### -import csv -import subprocess -import os -import re - -####################################################################### -# From termgraph -# https://github.com/mkaz/termgraph/ -####################################################################### - -tg_width = 50 -tg_format = '{:<4.2f}' -DELIM = ',' -TICK = 'â–‡' -SM_TICK = 'â–' - - -def find_max_label_length(labels): - """Return the maximum length for the labels.""" - length = 0 - for i in range(len(labels)): - if len(labels[i]) > length: - length = len(labels[i]) - - return length - - -def normalize(data, width): - """Normalize the data and return it.""" - # min_dat = find_min(data) - min_dat = data[-1] - # We offset by the minimum if there's a negative. - off_data = [] - if min_dat < 0: - min_dat = abs(min_dat) - for dat in data: - off_data.append([_d + min_dat for _d in dat]) - else: - off_data = data - # min_dat = find_min(off_data) - # max_dat = find_max(off_data) - min_dat = off_data[-1] - max_dat = off_data[0] - - if max_dat < width: - # Don't need to normalize if the max value - # is less than the width we allow. - return off_data - - # max_dat / width is the value for a single tick. norm_factor is the - # inverse of this value - # If you divide a number to the value of single tick, you will find how - # many ticks it does contain basically. - norm_factor = width / float(max_dat) - normal_dat = [] - for dat in off_data: - normal_dat.append([_v * norm_factor for _v in dat]) - - return normal_dat - - -def horiz_rows(labels, data, normal_dat): - global final_msg - """Prepare the horizontal graph. - Each row is printed through the print_row function.""" - # val_min = find_min(data) - val_min = data[-1] - - for i in range(len(labels)): - label = "{:<{x}}: ".format(labels[i], x=find_max_label_length(labels)) - - values = data[i] - num_blocks = normal_dat[i] - - for j in range(1): - # In Multiple series graph 1st category has label at the beginning, - # whereas the rest categories have only spaces. - if j > 0: - len_label = len(label) - label = ' ' * len_label - tail = ' {}'.format(tg_format.format(values)) - color = None - # print(label, end="") - final_msg += label - yield (values, int(num_blocks), val_min, color) - final_msg += tail + 'x\n' - - -# Prints a row of the horizontal graph. -def print_row(value, num_blocks, val_min, colors): - global final_msg - """A method to print a row for a horizontal graphs. - - i.e: - 1: ▇▇ 2 - 2: ▇▇▇ 3 - 3: ▇▇▇▇ 4 - """ - - if num_blocks < 1 and (value > val_min or value > 0): - # Print something if it's not the smallest - # and the normal value is less than one. - # sys.stdout.write(SM_TICK) - # print(SM_TICK, end="") - final_msg += SM_TICK - else: - for _ in range(num_blocks): - # sys.stdout.write(TICK) - # print(TICK, end="") - final_msg += TICK - - -def chart(data, labels): - # One category/Multiple series graph with same scale - # All-together normalization - normal_dat = normalize(data, tg_width) - for row in horiz_rows(labels, data, normal_dat): - print_row(*row) - - -####################################################################### -# Finish termgraph -####################################################################### - -import traceback -from optparse import OptionParser -from termgraph import TermGraph - - -def format_text(title, algorithm_times, options): - # Prepare data - final_vals = [] - final_tags = [] - - keylist = sorted(algorithm_times.keys(), - key=lambda x: algorithm_times[x], - reverse=True) - for k in keylist: - val = algorithm_times[k] - final_tags.append(k) - final_vals.append(val) - - # Plot - print(final_tags) - print(final_vals) - tg = TermGraph(suffix=options.unit, x_max=options.x_max) - final_msg = tg.chart(final_vals, final_tags) - - text = '{"text": "%s\n```\n%s```"}' % (title, final_msg) - return text - - -def send_to_mattermost(text, mattermost_url): - subprocess.call([ - "curl", "-i", "-X", "POST", "-H", 'Content-Type: application/json', - "-d", text, mattermost_url - ]) - - -""" -Produces a plot of the performance breakdown of the sequence under execution -""" - - -def main(): - usage = '%prog [options] <-d data_file>\n' + \ - 'Example: %prog -d data.csv -m "http://{your-mattermost-site}/hooks/xxx-generatedkey-xxx"' - parser = OptionParser(usage=usage) - parser.add_option( - '-m', - '--mattermost_url', - dest='mattermost_url', - help='The url where to post outputs generated for mattermost') - parser.add_option('-d', - '--data_file', - dest='data_file', - help='Path to a data file to plot') - parser.add_option( - '-u', - '--unit', - dest='unit', - default='', - help='A unit suffix to append to evey value. Default is an empty string' - ) - parser.add_option( - '-x', - '--x_max', - dest='x_max', - default=50, - help='Graph X axis is at least this many units wide. (default=50)') - parser.add_option('-t', - '--title', - dest='title', - default='', - help='Title for your graph. (default: empty string)') - (options, args) = parser.parse_args() - - if options.data_file is None: - parser.print_help() - - try: - options.x_max = float(options.x_max) - except: - parser.print_help() - print('\n-x has to be a convertible floating point value!\n') - return -1 - - algorithm_times = {} - with open(options.data_file) as csvfile: - csv_reader = csv.reader(csvfile, delimiter=',') - for row in csv_reader: - try: - algorithm_times[row[0]] = float(row[1]) - except: - print(traceback.format_exc()) - - # Convert throughputs to speedups - base_speed = min(algorithm_times.values()) - for k in algorithm_times.keys(): - algorithm_times[k] /= base_speed - - text = format_text(options.title, algorithm_times, options) - print(text) - if options.mattermost_url is not None: - send_to_mattermost(text, options.mattermost_url) - - -if __name__ == "__main__": - main() diff --git a/checker/plotting/speedup_sample.dat b/checker/plotting/speedup_sample.dat deleted file mode 100644 index 688ec80bec741af64355e6131b3441f042d2db9d..0000000000000000000000000000000000000000 --- a/checker/plotting/speedup_sample.dat +++ /dev/null @@ -1,8 +0,0 @@ -!!SAMPLE!! GTX 1060 6GB,7519.354825 -!!SAMPLE!! GTX 1080 Ti,18982.238479 -!!SAMPLE!! GTX 670,2297.042902 -!!SAMPLE!! GTX 680,2524.368385 -!!SAMPLE!! GTX TITAN X,11117.198792 -!!SAMPLE!! RTX 2080 Ti,43705.972936 -!!SAMPLE!! T4,22077.601157 -!!SAMPLE!! V100-PCIE-32GB,47568.303480 diff --git a/checker/plotting/termgraph.py b/checker/plotting/termgraph.py index 377f62e24be79c5e0cd4778e66fd860932416426..91be19dd01a6cc55e816a0b46a7ff0452ab93ed5 100755 --- a/checker/plotting/termgraph.py +++ b/checker/plotting/termgraph.py @@ -1,18 +1,43 @@ #!/usr/bin/python3 -############################################################################### -# (c) Copyright 2018-2020 CERN for the benefit of the LHCb Collaboration # -############################################################################### - -import math - ####################################################################### +# MIT License +# +# Copyright (c) 2018 Marcus Kazmierczak +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE +# # From termgraph # https://github.com/mkaz/termgraph/ ####################################################################### +import math + class TermGraph: - def __init__(self, tg_width=50, tg_format='{:<4.2f}', delim=',', tick='â–ˆ', sm_tick='â–Œ', suffix="", x_max = 50): + def __init__(self, + tg_width=50, + tg_format='{:<4.2f}', + delim=',', + tick='â–ˆ', + sm_tick='â–Œ', + suffix="", + x_max=50): self.tg_width = tg_width self.tg_format = tg_format self.DELIM = delim @@ -25,7 +50,7 @@ class TermGraph: self.small_tick = "â”´" self.horiz = "─" self.label_length = 0 - + def find_max_label_length(self, labels): """Return the maximum length for the labels.""" length = 0 @@ -34,7 +59,7 @@ class TermGraph: length = len(labels[i]) self.label_length = length - return length # Yes, yes, i know ... + return length # Yes, yes, i know ... def getScale(self, data): # min_dat = find_min(data) @@ -48,25 +73,25 @@ class TermGraph: #minimum -= epsilon rr = self.max_dat - self.min_dat stepCount = 10 - roughStep = rr / (stepCount -1) + roughStep = rr / (stepCount - 1) goodNormalizedSteps = [1, 2, 5, 10] stepPower = math.pow(10, -math.floor(math.log10(abs(roughStep)))) normalizedStep = roughStep * stepPower - goodNormalizedStep = list(filter(lambda x: x > normalizedStep, goodNormalizedSteps))[0] + goodNormalizedStep = list( + filter(lambda x: x > normalizedStep, goodNormalizedSteps))[0] self.step = int(goodNormalizedStep / stepPower) self.scaleMax = int(math.ceil(self.max_dat / self.step) * self.step) - self.scaleMin = int(math.floor(self.min_dat / self.step) * self.step) - self.strlen = max(len(str(int(self.scaleMin))), len(str(int(self.scaleMax)))) - print(self.strlen) + self.scaleMin = int(math.floor(self.min_dat / self.step) * self.step) + self.strlen = max( + len(str(int(self.scaleMin))), len(str(int(self.scaleMax)))) self.nSteps = int((self.scaleMax - self.scaleMin) / self.step) - print(self.scaleMin, self.scaleMax, self.step, self.nSteps) - self.tick_dist = int(self.tg_width / (self.scaleMax - self.scaleMin) * self.step / 2) - print(self.tick_dist) - + self.tick_dist = int( + self.tg_width / (self.scaleMax - self.scaleMin) * self.step / 2) + self.tg_width = int(self.tick_dist * 2 * self.nSteps) - print('Updating tg_width to: %d' % self.tg_width) + # print('Updating tg_width to: %d' % self.tg_width) return def numLen(self, num): @@ -84,17 +109,18 @@ class TermGraph: self.text += self.big_tick self.text += "\n" - + l = self.numLen(self.scaleMin) - l = int(l/2) - self.text += " " * (self.label_length - l - self.tick_dist + 2) - for i in range(self.scaleMin, self.scaleMax + self.step, self.step): - self.text += '{:^{width}}'.format(str(i), width = '%d' % (self.tick_dist * 2)) + l = int(l / 2) + self.text += " " * (self.label_length - l - self.tick_dist + 2) + for i in range(self.scaleMin, self.scaleMax + self.step, self.step): + self.text += '{:^{width}}'.format( + str(i), width='%d' % (self.tick_dist * 2)) self.text += "\n" def normalize(self, data, width): """Normalize the data and return it.""" - + # We offset by the minimum if there's a negative. off_data = [] if self.min_dat < 0: @@ -106,22 +132,21 @@ class TermGraph: #self.max_dat += abs(self.min_dat) #if self.max_dat < self.x_max: - # Don't need to normalize if the max value - # is less than the width we allow. - #return off_data + # Don't need to normalize if the max value + # is less than the width we allow. + #return off_data # self.max_dat = self.x_max # max_dat / width is the value for a single tick. norm_factor is the # inverse of this value # If you divide a number to the value of single tick, you will find how # many ticks it does contain basically. - print('width: %d, max_dat: %f' % (width, self.scaleMax)) + # print('width: %d, max_dat: %f' % (width, self.scaleMax)) norm_factor = width / float(self.scaleMax) normal_dat = [] for dat in off_data: normal_dat.append(dat * norm_factor) - return normal_dat def horiz_rows(self, labels, data, normal_dat): @@ -130,7 +155,8 @@ class TermGraph: val_min = min(data) for i in range(len(labels)): - label = "{:<{x}} │".format(labels[i], x=self.find_max_label_length(labels)) + label = "{:<{x}} │".format( + labels[i], x=self.find_max_label_length(labels)) values = data[i] num_blocks = normal_dat[i] @@ -141,18 +167,19 @@ class TermGraph: if j > 0: len_label = len(label) label = ' ' * len_label - tail = ' {} %s'.format(self.tg_format.format(values)) % self.suffix + tail = ' {} %s'.format( + self.tg_format.format(values)) % self.suffix color = None # print(label, end="") self.text += label - yield(values, int(num_blocks), val_min, color) + yield (values, int(num_blocks), val_min, color) self.text += tail + '\n' # Prints a row of the horizontal graph. def print_row(self, value, num_blocks, val_min, color): """A method to print a row for a horizontal graphs. - + i.e: 1: ▇▇ 2 2: ▇▇▇ 3 @@ -171,13 +198,13 @@ class TermGraph: # print(TICK, end="") self.text += self.TICK - for _ in range(max([num_blocks,1]), self.tg_width): + for _ in range(max([num_blocks, 1]), self.tg_width): self.text += ' ' def chart(self, data, labels): # One category/Multiple series graph with same scale # All-together normalization - self.text="" + self.text = "" self.getScale(data) normal_dat = self.normalize(data, self.tg_width) for row in self.horiz_rows(labels, data, normal_dat): @@ -203,4 +230,3 @@ def main(): if __name__ == '__main__': main() - diff --git a/scripts/allen_run.sh b/scripts/allen_run.sh deleted file mode 100755 index 3a67e83494ea40fe74a93183512c0efeb5da64ef..0000000000000000000000000000000000000000 --- a/scripts/allen_run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -############################################################################### -# (c) Copyright 2018-2020 CERN for the benefit of the LHCb Collaboration # -############################################################################### - -./mpirun_script.sh mlx5_0 mlx5_1 diff --git a/scripts/allen_run_2.sh b/scripts/allen_run_2.sh deleted file mode 100755 index a870741176dd4e5bc00aa75f3fd5980c050f19f0..0000000000000000000000000000000000000000 --- a/scripts/allen_run_2.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -############################################################################### -# (c) Copyright 2018-2020 CERN for the benefit of the LHCb Collaboration # -############################################################################### - -./mpirun_script_2.sh mlx5_1 mlx5_0 diff --git a/scripts/allen_run_3.sh b/scripts/allen_run_3.sh deleted file mode 100755 index ce363a9179fe94d5955a60373b298318f861c582..0000000000000000000000000000000000000000 --- a/scripts/allen_run_3.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -############################################################################### -# (c) Copyright 2018-2020 CERN for the benefit of the LHCb Collaboration # -############################################################################### - -./mpirun_script_3.sh mlx5_0 mlx5_1 diff --git a/scripts/ci/jobs/publish_throughput.sh b/scripts/ci/jobs/publish_throughput.sh index 33d435bcdf06bf721a1f1d369cf6dced4be34446..0510abc19c3fb1bbed2c198134b856172ea59962 100755 --- a/scripts/ci/jobs/publish_throughput.sh +++ b/scripts/ci/jobs/publish_throughput.sh @@ -3,8 +3,7 @@ # (c) Copyright 2018-2020 CERN for the benefit of the LHCb Collaboration # ############################################################################### -set -uo pipefail -set +xe +set -euxo pipefail setupViews @@ -42,29 +41,30 @@ for SEQUENCE_DATASET in $(ls -1 | grep "run_throughput" | grep -Ei "run_throughp BUILDOPTIONS_DISPLAY=${BUILDOPTIONS} fi + RC=0 python checker/plotting/post_combined_message.py \ -j "${CI_JOB_NAME}" \ -l "Throughput of [branch **\`${CI_COMMIT_REF_NAME} (${CI_COMMIT_SHORT_SHA})\`**, sequence **\`${SEQUENCE}\`** over dataset **\`${INPUT_FILES}\`** build options \`${BUILDOPTIONS_DISPLAY}\`](https://gitlab.cern.ch/lhcb/Allen/pipelines/${CI_PIPELINE_ID})" \ -t devices_throughputs_${SEQUENCE_DATASET}.csv \ -b run_throughput_output_${SEQUENCE_DATASET}/${BREAKDOWN_DEVICE_ID}/algo_breakdown.csv \ - --allowed-average-decrease "${AVG_THROUGHPUT_DECREASE_THRESHOLD}" \ - --allowed-single-decrease "${DEVICE_THROUGHPUT_DECREASE_THRESHOLD}" # (%) - RC=$? + || RC=$? - python checker/plotting/post_telegraf.py -f devices_throughputs_${SEQUENCE_DATASET}.csv . -s "${SEQUENCE}" -b "${CI_COMMIT_REF_NAME}" -d "${INPUT_FILES}" -o "${BUILDOPTIONS}" - - if [ "$RC" = "5" ]; then - THROUGHPUT_ALARM=1 - THROUGHPUT_MESSAGES="${THROUGHPUT_MESSAGES} -*** sequence ${SEQUENCE} over dataset ${INPUT_FILES} - Device-averaged throughput change is less than ${AVG_THROUGHPUT_DECREASE_THRESHOLD} %" - elif [ "$RC" = "6" ]; then + if [ "$RC" = "7" ]; then THROUGHPUT_ALARM=1 THROUGHPUT_MESSAGES="${THROUGHPUT_MESSAGES} -*** sequence ${SEQUENCE} over dataset ${INPUT_FILES} - Single-device throughput change, for at least one device, is less than ${DEVICE_THROUGHPUT_DECREASE_THRESHOLD} %" +FAIL: throughput decreased too much for sequence ${SEQUENCE} over dataset ${INPUT_FILES}" + elif [ "$RC" != "0" ]; then + echo "FAIL: post_combined_message.py script failed" + exit 1 fi + + python checker/plotting/post_telegraf.py \ + -f devices_throughputs_${SEQUENCE_DATASET}.csv . \ + -s "${SEQUENCE}" -b "${CI_COMMIT_REF_NAME}" -d "${INPUT_FILES}" -o "${BUILDOPTIONS}" \ + || echo "WARNING: failed to post to telegraf" + echo "" echo "" - done if [ "${THROUGHPUT_ALARM}" = "1" ]; then