From a8513b9fb4878e9ed4cab732a467b1b15758219c Mon Sep 17 00:00:00 2001 From: Frank Siegert <frank.siegert@cern.ch> Date: Thu, 6 Feb 2020 10:29:26 +0100 Subject: [PATCH 1/2] Remove obsolete/useless parts of logParser In particular the "find ." JO discovery was super slow in large trees on networked file systems. --- scripts/logParser.py | 78 -------------------------------------------- 1 file changed, 78 deletions(-) diff --git a/scripts/logParser.py b/scripts/logParser.py index 15fe5a32c1..b8fdf3b976 100644 --- a/scripts/logParser.py +++ b/scripts/logParser.py @@ -2,7 +2,6 @@ import optparse, sys, math, subprocess, os from collections import OrderedDict -from csv import writer parser = optparse.OptionParser(usage=__doc__) parser.add_option("-i", "--input", default="-", dest="INPUT_FILE", metavar="PATH", help="input logfile") @@ -11,8 +10,6 @@ parser.add_option("-t", "--test", default=False, dest="TEST", action="store_true parser.add_option("-u", "--nocpu", default=False, dest="SKIPCPU", action="store_true", help="Ignore CPU timing information.") parser.add_option("-m", "--mcver", dest="MC_VER", default="mc", help="Specify MCXX campaign") parser.add_option("-c", "--nocolour", action="store_true", dest="NO_COLOUR", default=False, help="Turn off colour for copying to file") -parser.add_option("-x", "--csv", action="store", dest="OUTPUT_CSV", default="test.csv", help="Output csv file") -parser.add_option("-n", "--nocsv", action="store_true", dest="DISABLE_CSV", default=True, help="Do not store csv file") parser.add_option("-s", "--standalone", action="store_true", dest="STANDALONE", default=False, help="Run based on cvmfs location of files (stand-alone, no mcjoboptions locally)") opts, fileargs = parser.parse_args() @@ -223,57 +220,6 @@ def main(): print("jobOptions and release:") print("---------------------") - #Checking jobOptions - JOsList=getJOsList(JOsDict) - if not len(JOsList): - JOsErrors.append("including file \""+MCXX) - else: - if not len(JOsDict["including file \""+MCXX]): - JOsErrors.append("including file \""+MCXX) - - DSIDxxx='' - topJO='' - nTopJO=0 - loginfo( '- jobOptions =',"") - for jo in JOsList: - gitexcomm="find -L "+location+" -type f -name "+jo - retcode = subprocess.Popen(gitexcomm, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE).communicate() - gitJOs=retcode[0].decode() - if not gitJOs: - raise RuntimeError("ERROR: {0} jobOptions not found in git!".format(jo)) - pieces=gitJOs.split('/') - try: - if len(pieces) and pieces[-1].startswith(MCXX): - ##This is top JO - nTopJO=nTopJO+1 - topJO=jo - DSID=pieces[-2] - DSIDxxx=pieces[-3] - except: - raise RuntimeError("Unknown format of jO file: {0}\nShould look something like ./421xxx/421001/mc.Sherpa_Example1.py".format(gitJOs)) - - if nTopJO!=1: - logerr( "","ERROR: !=1 (%i) \"top\" JO files found!"%nTopJO) - raise RuntimeError("!= 1 \"top\" JO file found") - else: - for jo in JOsList: - if jo == topJO: - if os.path.exists(location+"/{0}/{1}/{2}".format(DSIDxxx,DSID,jo)): - loggood("",jo) - else: - logerr("",jo+" ERROR <-- jobOptions not found in git!") - else: - indices = [i for i,s in enumerate(gitJOs) if "/"+jo in s] - if len(indices)==1: - loggood("",jo) - else: - if not len(indices): - logerr("",jo+" - jobOptions not found in git!") - else: - logwarn("",jo+" - multiple instances of jobOptions not found in git!") - for ix in indices: - logwarn("",jo+" --> %s"%gitJOs[ix]) - #Checking release release="not found" if not len(JOsDict['using release']): @@ -595,30 +541,6 @@ def main(): else: logerr("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> Errors encountered! Not ready for production!") print("") - - - #Write csv file output - cols=['Brief desciption','JobOptions','CoM energy [GeV]','Events (Evgen-only)','Events (FullSim)','Events (Atlfast II)','Priority','Output formats','Cross section [pb]','Effective luminosity [fb-1]','Filter efficiency','Evgen CPU time/job [hr]','Input files','MC-tag','Release','Comments','Evgen tag','Simul tag','Merge tag','Digi tag','Reco tag','Rec Merge tag','Atlfast tag','Atlf Merge tag'] - row=[] - row=pad(row,24,"") - row[cols.index('JobOptions')]=topJO - #row[cols.index('CoM energy [GeV]')]=13000. - #row[cols.index('Events (Evgen-only)')]=opts.TOTAL_EVENTS - if opts.TOTAL_EVENTS: - row[cols.index('Events (Evgen-only)')]=str(opts.TOTAL_EVENTS)+" (CHECK MANUALLY)" - row[cols.index('Cross section [pb]')]=xs_nb*1000. - if opts.TOTAL_EVENTS: - row[cols.index('Effective luminosity [fb-1]')]=eff_lumi_fb - row[cols.index('Filter efficiency')]=filt_eff - row[cols.index('Evgen CPU time/job [hr]')]=cpuPerJob - row[cols.index('Release')]=release - #for n,c in enumerate(cols): - # print c,row[n] - if not opts.DISABLE_CSV: - outCSV=open(opts.OUTPUT_CSV,'w') - outCSVwriter=writer(outCSV) - outCSVwriter.writerow(row) - outCSV.close() return -- GitLab From 8f63f3b511ad684a438107671aaa113de6aa4c9c Mon Sep 17 00:00:00 2001 From: Frank Siegert <frank.siegert@cern.ch> Date: Thu, 6 Feb 2020 10:35:27 +0100 Subject: [PATCH 2/2] Keep JOsList defined as it's used later --- scripts/logParser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/logParser.py b/scripts/logParser.py index b8fdf3b976..0aa74bf3c6 100644 --- a/scripts/logParser.py +++ b/scripts/logParser.py @@ -220,6 +220,14 @@ def main(): print("jobOptions and release:") print("---------------------") + #Checking jobOptions + JOsList=getJOsList(JOsDict) + if not len(JOsList): + JOsErrors.append("including file \""+MCXX) + else: + if not len(JOsDict["including file \""+MCXX]): + JOsErrors.append("including file \""+MCXX) + #Checking release release="not found" if not len(JOsDict['using release']): -- GitLab