From c01a556b0678d19a9fe2c1c7a5b3e9cc85bfda30 Mon Sep 17 00:00:00 2001 From: Christoph Hasse <hasse.christoph@outlook.de> Date: Tue, 21 Jul 2020 13:00:07 +0200 Subject: [PATCH 1/2] handle the fact that a ref build could be required to reply to multiple -mr builds --- handlers/ThroughputProfileHandler.py | 229 +++++++++++++-------------- 1 file changed, 114 insertions(+), 115 deletions(-) diff --git a/handlers/ThroughputProfileHandler.py b/handlers/ThroughputProfileHandler.py index 7dabdb15..87cdc833 100644 --- a/handlers/ThroughputProfileHandler.py +++ b/handlers/ThroughputProfileHandler.py @@ -202,70 +202,126 @@ class ThroughputProfileHandler(BaseHandler): for l in lines: tmp = throughput_rgx.search(l) if tmp: + logging.info("Got throughput of {} Evts/s for {}/{}".format(float(tmp.group(1)),version,options)) return float(tmp.group(1)), test['lhcbpr_url'] raise Exception("Could not find a throughput number in run_log, thus aborting!") - try: - # let's post a reply to gitlab about the throughput test result - - # workaround ssl cert problems - # this environment variable is respected by the requests lirbary - os.environ['REQUESTS_CA_BUNDLE'] = "/etc/pki/tls/cert.pem" - os.environ['SSL_CERT_FILE'] = "/etc/pki/tls/cert.pem" - - # We start out knowing which slot we are currently in, eg. lhcb-master-ref or lhcb-master-mr - # But we don't know if the other slot executed before us or will execute after - # since we need both results for the reply we need to handle both cases. - # We do so by checking couchdb for the other slot, if it's not there it means - # we are running first so we abort as the reply will be generated by the other slot - # if one of the two slots failed for whatever reason, no reply will be generated. - - if ("lhcb-master-mr" in version or "lhcb-master-ref" in version) and options in ["Moore_hlt1_pp_default", "Moore_hlt2_reco_baseline"]: - from datetime import datetime - from LbNightlyTools.Utils import Dashboard - - logging.info("will try to create GitLab reply") - - # ask couchdb for some info on our currently used build slot - config = requests.get("https://lhcb-couchdb.cern.ch/nightlies-nightly/"+version) - if config.status_code != 200: - raise Exception("Could not get build slot information from couchDB for:" + version ) - - # who actually triggered the slot we are running the test for? - trigger_source = config.json()['config']['metadata']['ci_test']['trigger'] - - if "lhcb-master-ref" in version: - # this is the path that's a bit more cumbersome - - #first we need to find the lhcb-master-mr slot lauched from the same gitlab ci-test command - # connect to couchdb - dash = Dashboard() - today = datetime.today().strftime('%Y-%m-%d') - - # get build_id and trigger source for all slots from today - slots = [ (row.doc['config']['build_id'], row.doc['config']['metadata']['ci_test']['trigger']) - for row in dash.db.iterview('summaries/byDay', batch=100, key=today, include_docs=True) - if "lhcb-master-mr" in row.doc['config']['slot']] - # check if any of those slots have the same trigger as our slot - buildID = [s[0] for s in slots if trigger_source == s[1]][0] - # that should be the slot we are looking for - test_version = "lhcb-master-mr.{}".format(buildID) - - logging.info("Determined test build to be {}".format(test_version)) - - # if we are a ref build, our throughput is the reference - ref_throughput = throughput + def send_gitlab_reply(new_throughput, ref_throughput, options, web_link, trigger_source): + + throughput_change = (new_throughput-ref_throughput)/ref_throughput + + if "hlt1" in options: + tol = 0.005 + prefix= "hlt1" + else: + tol = 0.01 + prefix= "hlt2" + + label = None + thumb = "" + if throughput_change > tol: + label = prefix + "-throughput-increased" + thumb = ":thumbsup:" + elif throughput_change < -tol : + label = prefix + "-throughput-decreased" + thumb = ":thumbsdown:" + + # ok we made it this far, we are ready to talk to GitLab :) + message = ("Throughput Test [{opt}]({link}): {throughput}" + "Events/s -- change: {change:.2%} {thumb}").format(opt=options, + throughput=new_throughput, + change=throughput_change, + link=web_link, + thumb=thumb) + + logging.info("Sending GitLab Reply:\n{}".format(message)) + + if os.environ.get('GITLAB_TOKEN'): + try: + gitlab_server = _gitlabServer() + project = gitlab_server.projects.get(trigger_source['project_id']) + mr = project.mergerequests.get(trigger_source['merge_request_iid']) + discussion = mr.discussions.get(trigger_source['discussion_id']) + # reply to discussion + discussion.notes.create({'body': message}) + # add a label to MR (creates a project label if not existing, + # noop if already labeled) + if label: + mr.labels.append(label) + mr.save() + except gitlab.GitlabError as e: + # never fail when feedback can't be posted + logging.error('Could not post feedback to gitlab: ' + e.message) + pass + else: + raise Exception("Can't get GITLAB_TOKEN from environment, thus not posting to GitLab") + + + # let's post a reply to gitlab about the throughput test result + + # workaround ssl cert problems + # this environment variable is respected by the requests lirbary + os.environ['REQUESTS_CA_BUNDLE'] = "/etc/pki/tls/cert.pem" + os.environ['SSL_CERT_FILE'] = "/etc/pki/tls/cert.pem" + + # We start out knowing which slot we are currently in, eg. lhcb-master-ref or lhcb-master-mr + # But we don't know if the other slot executed before us or will execute after + # since we need both results for the reply we need to handle both cases. + # We do so by checking couchdb for resulsts of the other slot, if it's not there it means + # we are running first so we abort as the reply will be generated by the other slot + # if one of the two slots failed for whatever reason, no reply will be generated. + + if ("lhcb-master-mr" in version or "lhcb-master-ref" in version) and options in ["Moore_hlt1_pp_default", "Moore_hlt2_reco_baseline"]: + from LbNightlyTools.GitlabUtils import _gitlabServer + from LbNightlyTools.Utils import Dashboard + + logging.info("will try to create GitLab reply") + + if "lhcb-master-ref" in version: + # this is the path that's a bit more cumbersome as the ref slot can be + # reused my multiple -mr slots. Thus we need to get all of these connected + # -mr slots and generate replies for them if they have already run. + + # connect to couchdb + dash = Dashboard() + + # get (build id, gitlab trigger source) for all builds connected to this ref build + slots = [ (row.value[1], row.doc['config']['metadata']['ci_test']['trigger'] ) + for row in dash.db.iterview('merge_requests/mr_slots_by_ref_slot', + batch=100, include_docs=True, + key=["lhcb-master-ref", int(version[16:])])] + + if not slots: + logging.warn("GitLab reply aborted! Found no corresponding -mr builds for {}".format(version)) + + for slot in slots: + # assemble test slot id + test_version = "lhcb-master-mr.{}".format(slot[0]) + # get the throughtput for the test build # this call might fail, which means the test build doesn't have any results # most likely because it hasn't run yet. So we raise an exception thus skipping the GitLab reply part. # The reply should then be generated from the test build which apparently runs after us - throughput, web_link = get_throughput_via_couchdb(test_version, options) + try: + logging.info("Try to get throughput and send reply for test build {}".format(test_version)) + mr_slot_throughput, web_link = get_throughput_via_couchdb(test_version, options) + send_gitlab_reply(mr_slot_throughput, throughput, options, web_link, slot[1]) + except Exception as ex: + logging.warn("GitLab reply of test build {} failed with: {}".format(test_version, ex)) + + else: + # so this means I'm currently in a lhcb-master-mr build + + try: + # ask couchdb for some info on our currently used build slot + config = requests.get("https://lhcb-couchdb.cern.ch/nightlies-nightly/"+version) + if config.status_code != 200: + raise Exception("Could not get build slot information from couchDB for:" + version ) - else: - # so this means I'm currently in a lhcb-master-mr build - # so let's do the above dance but opposite + # who actually triggered the slot we are running the test for? + trigger = config.json()['config']['metadata']['ci_test']['trigger'] # finding the other slot is easier if we are in a test slot # extract the corresponding reference (lhcb-master-ref.xyz) build slot and id @@ -276,66 +332,9 @@ class ThroughputProfileHandler(BaseHandler): # this call might fail, which means the reference build doesn't have any results # most likely because it hasn't run yet. So we raise an exception thus skipping the GitLab reply part. # The reply should then be generated from the ref build which apparently runs after us - ref_throughput, _ = get_throughput_via_couchdb(".".join([ref_slot, str(ref_id)]), options) - web_link = website_url+dirname - - - - #################### - ## Prepare the actual GitLab reply - #################### - - throughput_change = (throughput-ref_throughput)/ref_throughput + ref_build_throughput, _ = get_throughput_via_couchdb(".".join([ref_slot, str(ref_id)]), options) + send_gitlab_reply(throughput, ref_build_throughput, options, website_url+dirname, trigger) - if "hlt1" in options: - tol = 0.005 - prefix= "hlt1" - else: - tol = 0.01 - prefix= "hlt2" - - label = None - thumb = "" - if throughput_change > tol: - label = prefix + "-throughput-increased" - thumb = ":thumbsup:" - elif throughput_change < -tol : - label = prefix + "-throughput-decreased" - thumb = ":thumbsdown:" - - - # ok we made it this far, we are ready to talk to GitLab :) - message = "Throughput Test [{opt}]({link}): {throughput} Events/s -- change: {change:.2%} {thumb}".format(opt=options, - throughput=throughput, - change=throughput_change, - link=web_link, - thumb=thumb - ) - - logging.info("Sending GitLab Reply:\n{}".format(message)) - - if os.environ.get('GITLAB_TOKEN'): - try: - from LbNightlyTools.GitlabUtils import _gitlabServer - gitlab_server = _gitlabServer() - project = gitlab_server.projects.get(trigger_source['project_id']) - mr = project.mergerequests.get(trigger_source['merge_request_iid']) - discussion = mr.discussions.get(trigger_source['discussion_id']) - # reply to discussion - discussion.notes.create({'body': message}) - # add a label to MR (creates a project label if not existing, - # noop if already labeled) - if label: - mr.labels.append(label) - mr.save() - except gitlab.GitlabError as e: - # never fail when feedback can't be posted - logging.error('Could not post feedback to gitlab: ' + e.message) - pass - else: - raise Exception("Can't get GITLAB_TOKEN from environment, thus not posting to GitLab") - - except Exception as ex: - import traceback - logging.error('Creating GitLab reply failed: %s', traceback.format_exc()) + except Exception as ex: + logging.warn("Creating GitLab reply failed: {}".format(ex)) -- GitLab From d89e8ee7a6934025c1b979d47acb91e929d1ddd8 Mon Sep 17 00:00:00 2001 From: Christoph Hasse <hasse.christoph@outlook.de> Date: Tue, 21 Jul 2020 13:45:46 +0200 Subject: [PATCH 2/2] run lb-format on ThroughputProfileHandler.py --- handlers/ThroughputProfileHandler.py | 221 ++++++++++++++++----------- 1 file changed, 134 insertions(+), 87 deletions(-) diff --git a/handlers/ThroughputProfileHandler.py b/handlers/ThroughputProfileHandler.py index 87cdc833..b8698de1 100644 --- a/handlers/ThroughputProfileHandler.py +++ b/handlers/ThroughputProfileHandler.py @@ -10,17 +10,18 @@ from collectRunResults import send_notification_mattermost class ThroughputProfileHandler(BaseHandler): - def __init__(self): super(self.__class__, self).__init__() def collectResultsExt(self, directory, project, version, platform, - hostname, cpu_info, memoryinfo, startTime, endTime, options): + hostname, cpu_info, memoryinfo, startTime, endTime, + options): # grab the correct files to get the throughput - files = [os.path.join(directory, f) - for f in os.listdir(directory) - if 'ThroughputTest' in f] + files = [ + os.path.join(directory, f) for f in os.listdir(directory) + if 'ThroughputTest' in f + ] regex = re.compile("Evts\/s = ([\d.]+)") throughput = 0 @@ -34,10 +35,11 @@ class ThroughputProfileHandler(BaseHandler): break str_tput = '{:.1f}'.format(throughput) - self.saveFloat("max_throughput", - throughput, - description="maximum throughput", - group="throughput") + self.saveFloat( + "max_throughput", + throughput, + description="maximum throughput", + group="throughput") website_url = 'https://cern.ch/lhcbpr-hlt/PerfTests/UpgradeThroughput/' @@ -56,22 +58,16 @@ class ThroughputProfileHandler(BaseHandler): + "_"\ + startTime.replace(' ', '_') - cpu_model = cpu_info.split(" @")[0].replace("(R)","").replace(" ","-") - trend_url = website_url+"trend_throughput_"+str(options)+"_"+str(version).split(".")[0]+".png" + cpu_model = cpu_info.split(" @")[0].replace("(R)", "").replace( + " ", "-") + trend_url = website_url + "trend_throughput_" + str( + options) + "_" + str(version).split(".")[0] + ".png" # concatenate log files into one file with open('tests.log', 'w') as outfile: for fname in files: - outfile.write(" "*80 - + "\n" - + "="*80 - + "\n" - + fname - + "\n" - + "="*80 - + "\n" - + " "*80 - + "\n") + outfile.write(" " * 80 + "\n" + "=" * 80 + "\n" + fname + + "\n" + "=" * 80 + "\n" + " " * 80 + "\n") with open(fname) as infile: for line in infile: outfile.write(line) @@ -131,28 +127,33 @@ class ThroughputProfileHandler(BaseHandler): targetRootEosDir = os.path.join(wwwDirEos, dirname) try: - subprocess.call(['xrdcp', - os.path.join(directory, 'flamy.svg'), - targetRootEosDir + "/flamy.svg"]) - subprocess.call(['xrdcp', - os.path.join(directory, 'FlameBars.pdf'), - targetRootEosDir + "/FlameBars.pdf"]) - subprocess.call(['xrdcp', - os.path.join(directory, 'FlameBars.png'), - targetRootEosDir + "/FlameBars.png"]) - subprocess.call(['xrdcp', - 'index.html', - targetRootEosDir + "/index.html"]) - subprocess.call(['xrdcp', - 'tests.log', - targetRootEosDir + "/tests.log"]) + subprocess.call([ + 'xrdcp', + os.path.join(directory, 'flamy.svg'), + targetRootEosDir + "/flamy.svg" + ]) + subprocess.call([ + 'xrdcp', + os.path.join(directory, 'FlameBars.pdf'), + targetRootEosDir + "/FlameBars.pdf" + ]) + subprocess.call([ + 'xrdcp', + os.path.join(directory, 'FlameBars.png'), + targetRootEosDir + "/FlameBars.png" + ]) + subprocess.call( + ['xrdcp', 'index.html', targetRootEosDir + "/index.html"]) + subprocess.call( + ['xrdcp', 'tests.log', targetRootEosDir + "/tests.log"]) except Exception as ex: logging.warning('Error copying html files to eos: %s', ex) - self.saveString("algousage", - website_url + dirname + "/flamy.svg", - description="link to algo usage plot", - group="performance") + self.saveString( + "algousage", + website_url + dirname + "/flamy.svg", + description="link to algo usage plot", + group="performance") # send notification on mattermost channel if "MATTERMOST_HOOK" in os.environ: @@ -172,29 +173,37 @@ class ThroughputProfileHandler(BaseHandler): + str_tput\ + " Events/s`" send_notification_mattermost(os.environ['MATTERMOST_HOOK'], - content) + content) else: logging.warning("notifications not sent" " because MATTERMOST_HOOK not set") - - - def get_throughput_via_couchdb(version, options): dash = Dashboard(flavour='periodic') - past_tests = [row.doc for row in - dash.db.iterview('nightlies_summaries/by_app_version', batch=10, key=version, include_docs=True)] + past_tests = [ + row.doc for row in dash.db.iterview( + 'nightlies_summaries/by_app_version', + batch=10, + key=version, + include_docs=True) + ] if options not in [doc['opt_name'] for doc in past_tests]: - raise Exception("Can't find throughput test: {} for slot: {}".format(options, version)) + raise Exception( + "Can't find throughput test: {} for slot: {}".format( + options, version)) # get the location of the run_log - test = [doc for doc in past_tests if options == doc['opt_name']][0] + test = [ + doc for doc in past_tests if options == doc['opt_name'] + ][0] try: # 28: is to get rid of the https prefix stuff since we have eos mounted run_log = test['run_log'][28:] except KeyError: - logging.info("run_log path not present in test dict, suggesting that this test didn't finish properly") + logging.info( + "run_log path not present in test dict, suggesting that this test didn't finish properly" + ) throughput_rgx = re.compile("Evts\/s = ([\d.]+)") with open(run_log) as log: @@ -202,48 +211,57 @@ class ThroughputProfileHandler(BaseHandler): for l in lines: tmp = throughput_rgx.search(l) if tmp: - logging.info("Got throughput of {} Evts/s for {}/{}".format(float(tmp.group(1)),version,options)) + logging.info( + "Got throughput of {} Evts/s for {}/{}".format( + float(tmp.group(1)), version, options)) return float(tmp.group(1)), test['lhcbpr_url'] - raise Exception("Could not find a throughput number in run_log, thus aborting!") + raise Exception( + "Could not find a throughput number in run_log, thus aborting!" + ) + def send_gitlab_reply(new_throughput, ref_throughput, options, + web_link, trigger_source): - def send_gitlab_reply(new_throughput, ref_throughput, options, web_link, trigger_source): - - throughput_change = (new_throughput-ref_throughput)/ref_throughput + throughput_change = ( + new_throughput - ref_throughput) / ref_throughput if "hlt1" in options: tol = 0.005 - prefix= "hlt1" + prefix = "hlt1" else: tol = 0.01 - prefix= "hlt2" + prefix = "hlt2" label = None thumb = "" if throughput_change > tol: label = prefix + "-throughput-increased" thumb = ":thumbsup:" - elif throughput_change < -tol : + elif throughput_change < -tol: label = prefix + "-throughput-decreased" thumb = ":thumbsdown:" # ok we made it this far, we are ready to talk to GitLab :) message = ("Throughput Test [{opt}]({link}): {throughput}" - "Events/s -- change: {change:.2%} {thumb}").format(opt=options, - throughput=new_throughput, - change=throughput_change, - link=web_link, - thumb=thumb) + "Events/s -- change: {change:.2%} {thumb}").format( + opt=options, + throughput=new_throughput, + change=throughput_change, + link=web_link, + thumb=thumb) logging.info("Sending GitLab Reply:\n{}".format(message)) if os.environ.get('GITLAB_TOKEN'): try: gitlab_server = _gitlabServer() - project = gitlab_server.projects.get(trigger_source['project_id']) - mr = project.mergerequests.get(trigger_source['merge_request_iid']) - discussion = mr.discussions.get(trigger_source['discussion_id']) + project = gitlab_server.projects.get( + trigger_source['project_id']) + mr = project.mergerequests.get( + trigger_source['merge_request_iid']) + discussion = mr.discussions.get( + trigger_source['discussion_id']) # reply to discussion discussion.notes.create({'body': message}) # add a label to MR (creates a project label if not existing, @@ -253,11 +271,13 @@ class ThroughputProfileHandler(BaseHandler): mr.save() except gitlab.GitlabError as e: # never fail when feedback can't be posted - logging.error('Could not post feedback to gitlab: ' + e.message) + logging.error('Could not post feedback to gitlab: ' + + e.message) pass else: - raise Exception("Can't get GITLAB_TOKEN from environment, thus not posting to GitLab") - + raise Exception( + "Can't get GITLAB_TOKEN from environment, thus not posting to GitLab" + ) # let's post a reply to gitlab about the throughput test result @@ -273,7 +293,10 @@ class ThroughputProfileHandler(BaseHandler): # we are running first so we abort as the reply will be generated by the other slot # if one of the two slots failed for whatever reason, no reply will be generated. - if ("lhcb-master-mr" in version or "lhcb-master-ref" in version) and options in ["Moore_hlt1_pp_default", "Moore_hlt2_reco_baseline"]: + if ("lhcb-master-mr" in version + or "lhcb-master-ref" in version) and options in [ + "Moore_hlt1_pp_default", "Moore_hlt2_reco_baseline" + ]: from LbNightlyTools.GitlabUtils import _gitlabServer from LbNightlyTools.Utils import Dashboard @@ -288,13 +311,21 @@ class ThroughputProfileHandler(BaseHandler): dash = Dashboard() # get (build id, gitlab trigger source) for all builds connected to this ref build - slots = [ (row.value[1], row.doc['config']['metadata']['ci_test']['trigger'] ) - for row in dash.db.iterview('merge_requests/mr_slots_by_ref_slot', - batch=100, include_docs=True, - key=["lhcb-master-ref", int(version[16:])])] + slots = [ + (row.value[1], + row.doc['config']['metadata']['ci_test']['trigger']) + for row in dash.db.iterview( + 'merge_requests/mr_slots_by_ref_slot', + batch=100, + include_docs=True, + key=["lhcb-master-ref", + int(version[16:])]) + ] if not slots: - logging.warn("GitLab reply aborted! Found no corresponding -mr builds for {}".format(version)) + logging.warn( + "GitLab reply aborted! Found no corresponding -mr builds for {}" + .format(version)) for slot in slots: # assemble test slot id @@ -305,36 +336,52 @@ class ThroughputProfileHandler(BaseHandler): # most likely because it hasn't run yet. So we raise an exception thus skipping the GitLab reply part. # The reply should then be generated from the test build which apparently runs after us try: - logging.info("Try to get throughput and send reply for test build {}".format(test_version)) - mr_slot_throughput, web_link = get_throughput_via_couchdb(test_version, options) - send_gitlab_reply(mr_slot_throughput, throughput, options, web_link, slot[1]) + logging.info( + "Try to get throughput and send reply for test build {}" + .format(test_version)) + mr_slot_throughput, web_link = get_throughput_via_couchdb( + test_version, options) + send_gitlab_reply(mr_slot_throughput, throughput, + options, web_link, slot[1]) except Exception as ex: - logging.warn("GitLab reply of test build {} failed with: {}".format(test_version, ex)) + logging.warn( + "GitLab reply of test build {} failed with: {}" + .format(test_version, ex)) else: # so this means I'm currently in a lhcb-master-mr build try: # ask couchdb for some info on our currently used build slot - config = requests.get("https://lhcb-couchdb.cern.ch/nightlies-nightly/"+version) + config = requests.get( + "https://lhcb-couchdb.cern.ch/nightlies-nightly/" + + version) if config.status_code != 200: - raise Exception("Could not get build slot information from couchDB for:" + version ) + raise Exception( + "Could not get build slot information from couchDB for:" + + version) # who actually triggered the slot we are running the test for? - trigger = config.json()['config']['metadata']['ci_test']['trigger'] + trigger = config.json( + )['config']['metadata']['ci_test']['trigger'] # finding the other slot is easier if we are in a test slot # extract the corresponding reference (lhcb-master-ref.xyz) build slot and id - ref_slot, ref_id = config.json()['config']['metadata']['ci_test']['reference'] - logging.info("Determined ref build to be {}.{}".format(ref_slot,ref_id)) + ref_slot, ref_id = config.json( + )['config']['metadata']['ci_test']['reference'] + logging.info("Determined ref build to be {}.{}".format( + ref_slot, ref_id)) # get the throughtput for the reference build # this call might fail, which means the reference build doesn't have any results # most likely because it hasn't run yet. So we raise an exception thus skipping the GitLab reply part. # The reply should then be generated from the ref build which apparently runs after us - ref_build_throughput, _ = get_throughput_via_couchdb(".".join([ref_slot, str(ref_id)]), options) - send_gitlab_reply(throughput, ref_build_throughput, options, website_url+dirname, trigger) + ref_build_throughput, _ = get_throughput_via_couchdb( + ".".join([ref_slot, str(ref_id)]), options) + send_gitlab_reply(throughput, ref_build_throughput, + options, website_url + dirname, + trigger) except Exception as ex: - logging.warn("Creating GitLab reply failed: {}".format(ex)) - + logging.warn( + "Creating GitLab reply failed: {}".format(ex)) -- GitLab