logParser.py 31 KB
Newer Older
1
2
#! /usr/bin/env python

3
import optparse, sys, math, subprocess, os, re, glob
4
5
from collections import OrderedDict

Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
6
# This might be needed when running logParser in standalone mode
7
sys.path.append("/cvmfs/atlas.cern.ch/repo/sw/Generators/MCJobOptions/scripts")
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
8
9
import jo_utils

10
parser = optparse.OptionParser(usage=__doc__)
11
12
parser.add_option("-i", "--input", default="-", dest="INPUT_FILE", metavar="PATH",   help="path to input log.generate")
parser.add_option("-j", "--joFile", default=None, dest="JOFILE", metavar="PATH",   help="path to jO file")
Frank Siegert's avatar
Frank Siegert committed
13
parser.add_option("-u", "--nocpu", default=False, dest="SKIPCPU", action="store_true", help="Ignore CPU timing information.")
14
parser.add_option("-m", "--mcver", dest="MC_VER", default="mc", help="Specify MCXX campaign")
15
parser.add_option("-c", "--nocolour", action="store_true", dest="NO_COLOUR", default=False, help="Turn off colour for copying to file")
16
parser.add_option("-s", "--standalone", action="store_true", dest="STANDALONE", default=False, help="Run based on cvmfs location of files (stand-alone, no mcjoboptions locally)")
17
18
19
20

opts, fileargs = parser.parse_args()

MCXX='%s.'%opts.MC_VER
21
location = '/cvmfs/atlas.cern.ch/repo/sw/Generators/MCJobOptions' if opts.STANDALONE else '.'
22
nEventsRequested=0
23

24
25
26
# define dictionaries with keys as variables to be searched for and values to store the results
JOsDict={
    'using release':[],
27
28
    'including file "'+MCXX:[],
    'including file "EvgenJobTransforms/LHEonly.py"':[],
29
30
31
}

generatorDict={
32
33
    'Setting event_norm':[], # used in MG for newer releases
    '"event_norm"':[], # used in MG for older releases
34
    'Setting nevents':[],
35
36
    'No pdf base fragment':[],
    'Generating events from gridpack':[],
37
38
    'Integration grid files found locally. Event generation shall continue':[],
    'You asked Pythia8 to modify properties for particle':[]
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
}

testHepMCDict={
    'Events passed':[],
    'Efficiency':[]
}

countHepMCDict={
    'Events passing all checks and written':[]
}

evgenFilterSeqDict={
    'Weighted Filter Efficiency':[],
    'Filter Efficiency':[]
}

simTimeEstimateDict={
    'RUN INFORMATION':[]
}

metaDataDict={
    'physicsComment =':[],
    'generatorName =':[],
    'generatorTune':[],
    'specialConfig =':[],
    'contactPhysicist =':[],
    'genFilterNames = ':[],
    'cross-section (nb)':[],
    'generator =':[],
    'weights =':[],
    'PDF =':[],
    'GenFiltEff =':[],
    'sumOfNegWeights =':[],
    'sumOfPosWeights =':[]
}

generateTfDict={
76
    'ecmEnergy':[],
77
78
79
80
    'nEventsPerJob':[],
    'Requested output events':[],
    'transform':[],
    'inputFilesPerJob':[],
81
82
    'inputGeneratorFile':[],
    'evgenkeywords':['found']
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
}

perfMonDict={
    'snapshot_post_fin':[],
    'jobcfg_walltime':[],
    'last -evt vmem':[]
}

testDict = {
    'TestHepMC':testHepMCDict,
    'EvgenFilterSeq':evgenFilterSeqDict,
    'CountHepMC':countHepMCDict,
    'SimTimeEstimate':simTimeEstimateDict
}

# Function to get jO includes
def getJOsList():
    liststr=''
101
    # Get jO files mc.*.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
    if len(JOsDict["including file \""+MCXX]):
        if len(liststr): liststr+="|"
        liststr+="|".join(JOsDict["including file \""+MCXX]).replace("nonStandard/","")
    liststr=liststr.replace('/','').replace('"','').replace('including file','').replace(' ','')
    tmplist=liststr.split('|')
    return tmplist

# Function to check blacklist
def checkBlackList(relFlavour,cache,generatorName,location) :
    isError = None
    with open(location+'/common/BlackList_caches.txt') as bfile:
        for line in bfile.readlines():
            if not line.strip():
                continue
            # Blacklisted release flavours
            badRelFlav=line.split(',')[0].strip()
            # Blacklisted caches
            badCache=line.split(',')[1].strip()
            # Blacklisted generators
            badGens=line.split(',')[2].strip()
            
            #Match Generator and release type e.g. AtlasProduction, MCProd
            if relFlavour==badRelFlav and cache==badCache and re.search(badGens,generatorName) is not None:
                isError=f"{relFlavour},{cache} is blacklisted for {generatorName}"
                return isError
    return isError

# Function to parse log.generate lines using a given identifier - result is stored in dictionary
def checkLine(line, lineIdentifier, dict, splitby):
    if lineIdentifier in line:
        for param in dict:
            if param=="including file \""+MCXX:
                if "including file" in line and MCXX in line:
                    if len(line.split(splitby))==0:
                        raise RuntimeError("Found bad entry %s"%line)
                    else:
                        thing="".join(line.split(lineIdentifier)[1].split(splitby)[1:]).split("/")[-1].strip()
                        dict[param].append(thing)
140
                continue
141
142
            elif param=="Requested output events":
                if "Requested output events" in line:
143
144
145
                   thing="".join(line.split(lineIdentifier)[1].split(" ")[-1]).strip()
                   dict[param].append(thing)
                continue
146
147
148
149
            elif param=="inputFilesPerJob":
                if "inputFilesPerJob" in line:
                    thing=line.split(splitby)[1].split()[0]
                    dict[param].append(thing)
150
                continue
151
152
            elif param=="inputGeneratorFile":
                if "inputGeneratorFile" in line:
153
154
155
156
157
158
159
160
                    if not "Herwig" in line:
                        # Gen_tf print out looks like Py:Gen_tf  INFO inputGeneratorFile used /path/to/file
                        thing="".join(line.split(" used ")[-1]).strip()
                        dict[param].append(thing)
                    elif "Herwig" in line and "is compressed" in line:
                        # Herwig7_i printout looks like Py:Herwig7_i/Herwig7_LHEF.py INFO inputGeneratorFile 'file1, file2, ...' is compressed - will look for uncompressed LHE file
                        thing="".join(line.split(lineIdentifier)[1].split(splitby)[1].split()[0]).strip('\'')
                        dict[param].append(thing)
161
162
163
164
165
                continue
            elif param=="evgenkeywords":
                if "Could not find evgenkeywords.txt" in line:
                    dict[param]=['not found']
                continue
166
167
168
169
170
171
172
            else:
                if param in line:
                    if len(line.split(splitby))==0:
                        raise RuntimeError("Found bad entry %s"%line)
                    else:
                        thing="".join(line.split(lineIdentifier)[1].split(splitby)[1:]).strip()
                        dict[param].append(thing)
173

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226

# For printing
class bcolors:
    if not opts.NO_COLOUR:
        HEADER = '\033[95m'
        OKBLUE = '\033[94m'
        OKGREEN = '\033[92m'
        WARNING = '\033[93m'
        FAIL = '\033[91m'
        ENDC = '\033[0m'
    else:
        HEADER = ''
        OKBLUE = ''
        OKGREEN = ''
        WARNING = ''
        FAIL = ''
        ENDC = ''

    def disable(self):
        self.HEADER = ''
        self.OKBLUE = ''
        self.OKGREEN = ''
        self.WARNING = ''
        self.FAIL = ''
        self.ENDC = ''

# For counting errors/warnings
class LogCounts:
    Errors = 0
    Warnings = 0

# Functions that print coloured output
def loginfo(out1,out2):
    print(f"{out1:s}{bcolors.OKBLUE} {out2:s}{bcolors.ENDC}")
def loggood(out1,out2):
    print(f"{out1:s}{bcolors.OKGREEN} {out2:s}{bcolors.ENDC}")
def logerr(out1,out2):
    print(f"{out1:s}{bcolors.FAIL} {out2:s}{bcolors.ENDC}")
    LogCounts.Errors += 1
def logwarn(out1,out2):
    print(f"{out1:s}{bcolors.WARNING} {out2:s}{bcolors.ENDC}")
    LogCounts.Warnings += 1

def pad(seq, target_length, padding=None):
    length = len(seq)
    if length > target_length:
        return seq
    seq.extend([padding] * (target_length - length))
    return seq

# Functions for generator-specific tests
# Sherpa checks
def sherpaChecks(logFile):
227
228
229
230
231
    file=open(logFile,"r")
    lines=file.readlines()    
    # check each line
    inside = 0
    numexceeds =0
232
    retriedBlock=False
233
234
235
236
237
    for line in lines:
        if "exceeds maximum by" in line:
            numexceeds +=1
            loginfo("- "+line.strip(),"")
        if "Retried events" in line:
238
            retriedBlock = True
239
            continue
240
        if retriedBlock:
241
            if "}" in line:
242
243
244
245
                retriedBlock=False
                continue
            if len(line.split('"')) == 1 or len(line.split('->'))== 1:
                continue
246
247
248
249
250
            name = line.split('"')[1]
            percent = line.split('->')[1].split("%")[0].strip()
            if float(percent) > 5.:
                logwarn("- retried events "+name+" = ",percent+" % <-- WARNING: more than 5% of the events retried")
            else:
251
                loginfo("- retried events "+name+" = ",percent+" %")
252
253
    if numexceeds*33>int(nEventsRequested):
        logwarn("","WARNING: be aware of: "+str(numexceeds*100./nEventsRequested)+"% of the event weights exceed the maximum by a factor of ten")
254

255
# Pythia 8 checks
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def pythia8Checks(logFile,generatorName):
    file=open(logFile,"r")
    lines=file.readlines()
    usesShowerWeights = False
    usesMatchingOrMerging = False
    usesCorrectPowheg = False
    errors = False
    for line in lines:
        if "Pythia8_ShowerWeights.py" in line:
            usesShowerWeights = True
        if "Pythia8_aMcAtNlo.py" in line or "Pythia8_CKKWL_kTMerge.py" in line or "Pythia8_FxFx.py" in line:
            usesMatchingOrMerging = True
        if "Pythia8_Powheg_Main31.py" in line:
            usesCorrectPowheg = True
    if usesShowerWeights and usesMatchingOrMerging:
        logerr("ERROR:","Pythia 8 shower weights buggy when using a matched/merged calculation. Please remove the Pythia8_ShowerWeights.py include.")
        errors = True
    if "Powheg" in generatorName and not usesCorrectPowheg:
        logerr("ERROR:",generatorName+" used with incorrect include file. Please use Pythia8_Powheg_Main31.py")
        errors = True
276
277
    if len(generatorDict['You asked Pythia8 to modify properties for particle']) != 0:
        logwarn("WARNING: modification of particle properties requested: ", ''.join(generatorDict['You asked Pythia8 to modify properties for particle']))
278
279
    if not errors:
        loggood("INFO: Pythia 8 checks:","Passed")		
280

281
282
# Herwig 7 checks
def herwig7Checks(logFile,generatorName):
283
    errors = False
284
285
286
    allowed_tunes=['H7.1-Default', 'H7.1-SoftTune', 'H7.1-BaryonicReconnection']
    if "7.1" in generatorName:
        if metaDataDict['generatorTune'][0] not in allowed_tunes:
287
            logerr("ERROR:", f"Metadata tune set to {metaDataDict['generatorTune'][0]}, which is not in the list of allowed tunes: {allowed_tunes}")
288
            errors = True
289
290
291
292
293
294
        file=open(logFile,"r")
        lines=file.readlines()
        for line in lines:
            if "Herwig7_EvtGen.py" in line:
                logerr("ERROR:","Herwig 7.1 used with wrong include: Herwig7_EvtGen.py. Please use Herwig71_EvtGen.py instead.")
                errors = True
295
                break
296
297
298
    if not errors:
        loggood("INFO: Herwig 7 checks:","Passed")

299
# Madgraph checks
300
def madgraphChecks(logFile):
301
302
    errors=False
    # Check that the events that MG generates are 10% more than nEventsPerJob
303
304
    if not generateTfDict['inputGeneratorFile']: # This check only makes sense if no external LHE inputs are used
        try:
305
306
307
            neventsMG=int(float(generatorDict['Setting nevents'][-1]))
            if len(generatorDict['Setting nevents']) > 1:
                logwarn("WARNING:","jO seem to be changing nevents multiple times. Check with MG experts if this is acceptable.")
308
309
310
311
312
        except:
            raise RuntimeError("nevents was not set in the MG5aMC jO")
        if neventsMG < int(1.1*nEventsRequested):
            logerr("ERROR:",f"Increase nevents to be generated in MG from {neventsMG} to {int(1.1*nEventsRequested)}")
            errors=True
313
314
315
    # Check if PDF base fragments were included
    if generatorDict['No pdf base fragment']:
        logwarn("WARNING:","No PDF base fragment was included, which is the recommended way to steer pdf and systematics (see https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/MadGraph5aMCatNLOForAtlas#PDF_and_systematics_via_Base_fra)")
316
317
    file=open(logFile,"r")
    lines=file.readlines()
318
    # If gridpack is used check to see if gridpack is found (LHE only jobs produce the gridpack themselves so no need to check this)
319
    if generatorDict['Generating events from gridpack']:
320
321
322
323
        if not glob.glob(f"{os.path.dirname(logFile)}/mc_*TeV.*.GRID.tar.gz"):
            logerr("ERROR:",f"Gridpack was used but no mc_*TeV.*.GRID.tar.gz file present in {os.path.dirname(logFile)}")
            errors = True
    # Check whether the correct normalisation is used
324
325
326
327
328
329
    if generatorDict['Setting event_norm']:
        if generatorDict['Setting event_norm'][-1] == 'sum':
            logwarn("WARNING:","The use of event_norm=sum will almost always result in the sample having the wrong total cross section -- please double check that event_norm=average is set in the param_card.dat.")
    else:
        if generatorDict['"event_norm"'] and generatorDict['"event_norm"'][-1] == 'sum':
            logwarn("WARNING:","The use of event_norm=sum will almost always result in the sample having the wrong total cross section -- please double check that event_norm=average is set in the param_card.dat.")
330
331
332
333
334
    for line in lines:
        if "We need to recalculate the branching fractions" in line:
            br_particles=[p.strip() for p in line.split('for')[-1].split(',')]
            bad_br_particles=[p for p in br_particles if p in ['t','t~','w+','w-','z','h']]
            if len(bad_br_particles)>0:
335
336
337
                logwarn("WARNING:","MadWidth is used to calculate the branching ratios of {}. This is only LO accurate. For more accurate BRs, please set them explictly in the param_card.dat.".format(",".join(bad_br_particles)))
    if not errors:
        loggood("INFO: MadGraph checks:","Passed")
338

339
340
341
342
343
344
345
346
347
348
# Powheg checks
def powhegChecks(logFile):
    errors = False
    if generatorDict['Integration grid files found locally. Event generation shall continue']:
        if not glob.glob(f"{os.path.dirname(logFile)}/mc_*TeV.*.GRID.tar.gz"):
            logerr("ERROR:",f"Gridpack was used but no mc_*TeV.*.GRID.tar.gz file present in {os.path.dirname(logFile)}")
            errors = True
    if not errors:
        loggood("INFO: Powheg checks:","Passed")

349

350
# Main function
351
352
353
def main():
    """logParser.py script for parsing log.generate files to check MC production settings and output
     - Written by Josh McFayden <mcfayden@cern.ch> Nov 2016 """
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
354
    
355
    global nEventsRequested, LHEonly
356

357
358
359
360
361
362
363
    if opts.INPUT_FILE=="-":
        parser.print_help()
        return 
    
    # open and read log file
    file=open(opts.INPUT_FILE,"r")
    lines=file.readlines()
364

365
366
367
368
    # check each line
    for line in lines:
        checkLine(line,'Py:Athena',JOsDict,'INFO')
        checkLine(line,'MetaData',metaDataDict,'=')
369
        checkLine(line,'Py:Gen_tf',generateTfDict,'=')
370
371
372
373
374
375
        checkLine(line,'Py:PerfMonSvc',perfMonDict,':')
        checkLine(line,'PMonSD',perfMonDict,'---')
        checkLine(line,'TestHepMC',testHepMCDict,'=')
        checkLine(line,'Py:EvgenFilterSeq',evgenFilterSeqDict,'=')
        checkLine(line,'CountHepMC',countHepMCDict,'=')
        checkLine(line,'SimTimeEstimate',simTimeEstimateDict,'|')
376
377
        checkLine(line,'Py:MadGraphUtils',generatorDict,'=')
        checkLine(line,'Py:MadGraphSysUtils',generatorDict,'WARNING !!!')
378
        checkLine(line,'Py:PowhegControl',generatorDict,'INFO')
379
        checkLine(line,'Py:newP8util',generatorDict,'WARNING')
380
381
        # This is a slight abuse (extract inputGeneratorFile printed from H7 interface and put it into transform dictionary)
        checkLine(line,'Py:Herwig7_i/Herwig7_LHEF.py',generateTfDict,'inputGeneratorFile')
382
383
384
        # Again an abuse to extract number of events for MadGraph which are printed like "05:14:02      Nb of events :  20000"
        checkLine(line,'Nb of events',generatorDict,':')
        
385
386
387
    ### Special generators that are to be excluded from certain tests
    specialGenerators=["Superchic", "Starlight", "ParticleGun"]
    
388
389
390
    ### Check if it's a LHE-only jO
    if len(JOsDict["including file \"EvgenJobTransforms/LHEonly.py\""]): LHEonly=True
    else: LHEonly=False
391
           
392
    ### jO checks
393
    JOsErrors=[]
394
395
396
397
    print("")
    print("---------------------")
    print("jobOptions and release:")
    print("---------------------")
398

399
400
    # Checking jobOptions
    JOsList=getJOsList()
401
402
403
404
405
    if not len(JOsList):
        JOsErrors.append("including file \""+MCXX)
    else:
        if not len(JOsDict["including file \""+MCXX]):
            JOsErrors.append("including file \""+MCXX)
406
407
    loginfo('- LHEonly = ',f'{LHEonly}')
   
408
409
410
    # Extract generator name
    generatorName=metaDataDict['generatorName ='][0]

411
    # Checking release
412
413
414
    if not len(JOsDict['using release']):
        JOsErrors.append(JOsDict['using release'])
    else:
415
        tmp=JOsDict['using release'][0].replace('using release','').strip().split()[0]
416
        val=tmp.replace('[','').replace(']','')
417
        flavour=val.split('-')[0]
418
        release=val.split('-')[1]
419
        blacklisted=checkBlackList(flavour,release,generatorName,location=location)
420
421
        # check that release is AthGeneration
        if flavour != "AthGeneration":
422
            logerr('- using release = ',f"{val} <-- ERROR: AthGeneration should be used instead of {flavour}")
423
        else:
424
425
            # check blacklist
            if blacklisted:
426
                logerr('- using release = ', f"{val} <-- ERROR: {val} is blacklisted")
427
            else:
428
                loggood('- using release = ', f"{val}")
429
     
430
431
    
    if len(JOsErrors):
432
433
        print("---------------------")
        print("MISSING JOs:")
434
435
        for i in JOsErrors:
            if i == "including file \""+MCXX:
436
                logerr("",f"ERROR: jO not found! (log.generate should contain lines like: including file \"{MCXX}*.py\")")
437
            else:
438
                logwarn("","WARNING: %s is missing!"%i)
439
440
    
    
441
    ### Generate transform checks
442
    generateErrors=[]
443
444
    print("")
    print("---------------------")
445
    print("Generate transform params:")
446
    print("---------------------")
447
448
449
450

    # add default energy - for backwards compatibility otherwise logParser would crash - to fix
    generateTfDict["ecmEnergy"].append("13000")

451
    for key in list(generateTfDict.keys()):
452
        optionalTests=['inputGeneratorFile']
453
        val=generateTfDict[key]
454
        if not len(val):
455
            if key not in optionalTests:
456
                generateErrors.append(key)
457
        else:
458
            if key == 'nEventsPerJob':
459
460
                # Allow to overwrite nEventsPerJob from jO file if specified (or present)
                if opts.JOFILE:
461
462
463
                    with open(opts.JOFILE, 'r') as jof:
                        jOContent = jof.read()
                    nEventsPerJob=jo_utils._read_param_from_jo(jOContent, ['evgenConfig.nEventsPerJob'])
464
465
466
467
                else:
                    # No jO file specified, still try to overwrite from JO in same dir
                    jOFile=os.path.join(location,os.path.dirname(opts.INPUT_FILE),JOsList[0])
                    if os.path.exists(jOFile):
468
469
470
                        with open(jOFile, 'r') as jof:
                            jOContent = jof.read()
                        nEventsPerJob=jo_utils._read_param_from_jo(jOContent, ['evgenConfig.nEventsPerJob'])
471
472
                    else:
                        nEventsPerJob=str(val[0]).split('#')[0].strip()
473
                if not nEventsPerJob:
474
475
                    logwarn("WARNING:", f"evgenConfig.nEventsPerJob is not defined in the jO. Will set to default: 10000")
                    nEventsPerJob=10000
476
                val=nEventsPerJob
477
                generateTfDict['nEventsPerJob'] = val
478
            elif key == 'transform':
479
                val=val[0]
480
                releaseNumber=int(release.split(".")[0])*10000+int(release.split(".")[1])*100+int(release.split(".")[2])
481
                if (val != 'Gen_tf' and val != 'Gen_tf_txt') and releaseNumber > 210610:
482
                    logerr(f"- {key} = ",f"{val} <- ERROR: tranform = {val} and release is {release}. Please use Gen_tf or Gen_tf_txt!")
483
                    continue
484
485
486
            elif key == 'evgenkeywords' and val[0] != 'found':
                logwarn(f"- {key} = ",f"{val[0]} <- WARNING: Keyword check has not been performed. Please check that the keywords used in the jobOption are in the allowed list of keywords: https://gitlab.cern.ch/atlas/athena/-/blob/21.6/Generators/EvgenJobTransforms/share/file/evgenkeywords.txt")
                continue
487
            elif key == 'inputFilesPerJob':
488
489
                val=val[0]
                # Hard limit from ADC is 1000 files
490
                if int(val) > 100:
491
                    logerr(f"- {key} = ",f"{val} <- ERROR: Need to use less than 100.")
492
                    continue
493
            else:
494
495
                val=val[0]
            loginfo(f"- {key} = ",f"{val}")
496
            
497
498
499
500
    # Check if input files are used that inputFilesPerJob matches the number of input files
    if generateTfDict['inputGeneratorFile'] and len(generateTfDict['inputGeneratorFile'][0].split(',')) != int(generateTfDict['inputFilesPerJob'][0]):
        logerr("", f"ERROR: {len(generateTfDict['inputGeneratorFile'][0].split(','))} input files used while inputFilesPerJob={generateTfDict['inputFilesPerJob'][0]}")

501
    if len(generateErrors):
502
503
        print("---------------------")
        print("MISSING Generate params:")
504
505
506
        for i in generateErrors:
            logerr("","ERROR: %s is missing!"%i)
            
507
    # Number of requested output events
508
    nEventsRequested=int(generateTfDict["Requested output events"][0])
509
    
510
    ### Metadata checks
511
    metaDataErrors=[]
512
513
514
515
    print("")
    print("---------------------")
    print("Metadata:")
    print("---------------------")
516
517
518
519
520
521
522
523
524
525
526
527
    for key in metaDataDict:
        name=key.replace("=","").strip()
        val=metaDataDict[key]
        if not len(val):
            metaDataErrors.append(name)
        else:
            if name=="contactPhysicist":
                if '@' in "".join(val):
                    loggood( '- '+name+' = ',"".join(val))
                else:
                    logerr( '- '+name+' = ',"".join(val)+"  <-- ERROR: No email found")
                continue
528
            elif name=="cross-section (nb)":
529
                if float(val[0]) < 0:
530
                    logwarn( '- '+name+' = ',"".join(val)+"  <-- WARNING: Cross-section is negative")
531
                    continue
532
533
534
535
536
537
538
539
540
            loginfo( '- '+name+' = ',"".join(val))
    
    if len(metaDataDict["sumOfPosWeights ="]) and len(metaDataDict["sumOfNegWeights ="]):
        ratio = float(metaDataDict["sumOfNegWeights ="][0])*1.0/(float(metaDataDict["sumOfPosWeights ="][0]) + float(metaDataDict["sumOfNegWeights ="][0]))
        if ratio>0.15:
            logwarn( '- sumOfNegWeights/(sumOfPosWeights+sumOfNegWeights) = ',str(ratio)+"  <-- WARNING: more than 15% of the weights are negative")


    if len(metaDataErrors):
541
542
        print("---------------------")
        print("MISSING Metadata:")
543
        for i in metaDataErrors:
544
            if i=="weights" or i=="genFilterNames" or i=="generator" or i=="PDF" or i=="sumOfNegWeights" or i=="sumOfPosWeights" \
545
               or ((i=="cross-section (nb)" or i=="generatorTune") and (any(gen in generatorName for gen in specialGenerators)))\
546
               or (i=="generatorTune" and LHEonly):
547
548
549
550
                loginfo("INFO:","%s is missing"%i)
            else:
                logerr("","ERROR: %s is missing!"%i)
            
551
552
553
554
555
556
557
558
559
560
561
    ### Generator specific tests
    print("")
    print("-------------------------")
    print(f"Generator specific tests: {generatorName}")
    print("-------------------------")
    if "Sherpa" in generatorName:
        sherpaChecks(opts.INPUT_FILE)
    if "Pythia8" in generatorName:
        pythia8Checks(opts.INPUT_FILE,generatorName)
    if "Herwig7" in generatorName:
        herwig7Checks(opts.INPUT_FILE,generatorName)
562
    if "MadGraph" in generatorName or "aMcAtNlo" in generatorName:
563
        madgraphChecks(opts.INPUT_FILE)
564
565
    if "Powheg" in generatorName:
        powhegChecks(opts.INPUT_FILE)
566

567
    ###  Event tests
568
569
    testErrors=[]
    filt_eff=1.0
570
    CountHepMC=0
571
572
573
574
    print("")
    print("---------------------")
    print("Event tests:")
    print("---------------------")
575
576
577
578
579
580
581
582
583
    for dictkey in testDict:
        for key in testDict[dictkey]:
            name=key
            val=testDict[dictkey][key]
            if not len(val):
                testErrors.append("%s %s"%(dictkey,name))
            else:
                #Check final Nevents processed
                if dictkey=="CountHepMC":
584
                    nEventsAllowedInProduction = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
585
                    CountHepMC=int(val[0])
586
587
588
589
590
591
592
                    # For LHE only runs, CountHepMC will always be 1 (only 1 event showered) so we set this equal to the number of events requested
                    # which represents the number of events written in the LHE
                    if LHEonly:
                        CountHepMC=nEventsRequested
                        continue # We also don't want to perform the following checks
                    # If CountHepMC is equal to nEventsPerJob it means the production job and the test job have equal number of events
                    # otherwise the production job will be run with different number of events than the test job
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
593
                    tmp=CountHepMC if CountHepMC == int(nEventsPerJob) else int(nEventsPerJob)
594
595
                    if not (tmp in nEventsAllowedInProduction or tmp % 10000 == 0):
                        logerr( '- '+dictkey+" "+name+' = ', f"{tmp}  <-- ERROR: Not an acceptable number of events for production (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000) or multiple of 10k")
596
                    elif CountHepMC != nEventsRequested:
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
597
                        logerr( '- '+dictkey+" "+name+' = ', f"{val[0]}  <-- ERROR: This is not equal to Requested output events={nEventsRequested}")
598
                    else:
599
600
601
602
603
604
605
606
607
608
609
610
611
                        loggood( '- '+dictkey+" "+name+' = ',"".join(val))
                    continue
                if dictkey=="TestHepMC" and name=="Efficiency":
                    if float(val[0].replace('%',''))<100. and float(val[0].replace('%',''))>=98.:
                        logwarn( '- '+dictkey+" "+name+' = ',"".join(val))
                    elif float(val[0].replace('%',''))<100.:
                        logerr( '- '+dictkey+" "+name+' = ',"".join(val))
                    else:
                        loggood( '- '+dictkey+" "+name+' = ',"".join(val))
                    continue
                loginfo( '- '+dictkey+" "+name+' = ',"".join(val))
    
    if len(testErrors):
612
613
        print("---------------------")
        print("Failed tests:")
614
615
616
617
        for i in testErrors:
            if i =="SimTimeEstimate RUN INFORMATION":
                logwarn("","WARNING: %s is missing!"%i)
            else:
618
                # TestHepMC is not needed for special generators or when running LHE only
619
                if ("TestHepMC" in i) and ((any(gen in generatorName for gen in specialGenerators)) or LHEonly):
620
                    logwarn("",f"WARNING: {i} is missing")
621
622
                else:
                    logerr("","ERROR: %s is missing!"%i)
623
624


625
    ### Performance tests
626
627
628
629
630
631
632
633
634
635
636
637
    cpuPerJob=0.0
    perfMonErrors=[]
    print("")
    print("---------------------")
    print("Performance metrics:")
    print("---------------------")
    for key in perfMonDict:
        name=key
        val=perfMonDict[key]
        if not len(val):
            perfMonErrors.append(name)
        else:
Frank Siegert's avatar
Frank Siegert committed
638
            if key == 'snapshot_post_fin' and not opts.SKIPCPU:
639
640
641
642
643
                name = 'CPU'
                tmp = 0.
                tmp=float(val[0].split()[3])
                if len(perfMonDict['jobcfg_walltime']):
                    tmp+=float(perfMonDict['jobcfg_walltime'][0].split()[1].split('=')[1])
644
645
646
647
648
649
650
651
652
653
654
                cpuPerJob=tmp/(1000.*60.*60.)
                # Calculate timing and extrapolate if test run was run with less events
                if CountHepMC != int(nEventsPerJob):
                    print(f"- actual CPU ({CountHepMC} events) = {cpuPerJob:.2f} hrs")
                    cpuPerJob=float(nEventsPerJob)*cpuPerJob/float(CountHepMC)
                    loginfo(f"- CPU extrapolated to {nEventsPerJob} events =", f"{cpuPerJob:.1f} hrs")
                if cpuPerJob > 18.:
                    logerr( f"- {name} = ",f"{cpuPerJob:.2f} hrs  <-- ERROR: Too high CPU time - should be between 6-12h. Adjust nEventsPerJob!")
                elif cpuPerJob >= 6. and cpuPerJob <= 12.:
                    loggood( f"- {name} = ",f"{cpuPerJob:.2f} hrs")
                elif cpuPerJob < 1.:
655
                    if ((CountHepMC == int(nEventsPerJob) and CountHepMC < 10000) or
656
                        (CountHepMC != int(nEventsPerJob) and int(nEventsPerJob) < 10000)) and not LHEonly:
657
                        logerr( f"- {name} = ",f"{cpuPerJob:.2f} hrs <-- ERROR: Too low CPU time - should be between 6-12h. Adjust nEventsPerJob!")
658
                    else:
659
                        loggood( f"- {name} = ",f"{cpuPerJob:.2f} hrs")
660
                else:
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
661
662
                    if ((CountHepMC == int(nEventsPerJob) and CountHepMC < 10000) or
                        (CountHepMC != int(nEventsPerJob) and int(nEventsPerJob) < 10000)):
663
                        logwarn( f"- {name} = ",f"{cpuPerJob:.2f} hrs  <-- WARNING: CPU time not optimal - should be between 6-12h. Adjust nEventsPerJob!")
664
                    else:
665
                        loggood( f"- {name} = ",f"{cpuPerJob:.2f} hrs")
666
                # Also print timing information for CI - CI runs max(1,0.01*nEventsPerJob)
667
                CICPU=max(1,0.01*float(nEventsPerJob))*cpuPerJob/float(nEventsPerJob)
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
                print(f"- estimated CPU for CI job = {CICPU:.2f} hrs")
                
            if key == 'last -evt vmem':
                name = 'Virtual memory'
                tmp=float(val[0].split()[0])
                if tmp > 4000 and tmp < 8000:
                    logwarn( '- '+name+' = ',"".join(val)+"  <-- WARNING: High memory usage - alert MC production team")
                elif tmp > 8000:
                    logerr( '- '+name+' = ',"".join(val)+"  <-- ERROR: Too high memory usage")
                else:
                    loggood( '- '+name+' = ',"".join(val))
    
    
    if len(perfMonErrors):
        print("---------------------")
        print("MISSING Performance metric:")
        for i in perfMonErrors:
685
            logerr("ERROR:", f"{i} is missing!")
686
            
687
688
    
    # Print total number of Errors/Warnings
689
690
691
692
    print("")
    print("---------------------")
    print(" Summary:")
    print("---------------------")
693
694
695
696
    if (LogCounts.Errors == 0):
        if (LogCounts.Warnings == 0):
            loggood("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> OK for production")
        else:
697
	        logwarn("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> Some warnings encountered, check that these are ok before submitting production!")
698
699
    else:
        logerr("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> Errors encountered! Not ready for production!")  
700
    print("")
701
702
703
        
    return 
    
704
    
705
706
if __name__ == "__main__":
    main()
707
708