logParser.py 34.6 KB
Newer Older
1
2
#! /usr/bin/env python

Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
3
import optparse, sys, math, subprocess, os, re, glob
4
5
from collections import OrderedDict

Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
6
# This might be needed when running logParser in standalone mode
7
sys.path.append("/cvmfs/atlas.cern.ch/repo/sw/Generators/MCJobOptions/scripts")
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
8
9
import jo_utils

10
parser = optparse.OptionParser(usage=__doc__)
11
12
parser.add_option("-i", "--input", default="-", dest="INPUT_FILE", metavar="PATH",   help="path to input log.generate")
parser.add_option("-j", "--joFile", default=None, dest="JOFILE", metavar="PATH",   help="path to jO file")
Frank Siegert's avatar
Frank Siegert committed
13
parser.add_option("-u", "--nocpu", default=False, dest="SKIPCPU", action="store_true", help="Ignore CPU timing information.")
14
parser.add_option("-m", "--mcver", dest="MC_VER", default="mc", help="Specify MCXX campaign")
15
parser.add_option("-c", "--nocolour", action="store_true", dest="NO_COLOUR", default=False, help="Turn off colour for copying to file")
16
parser.add_option("-s", "--standalone", action="store_true", dest="STANDALONE", default=False, help="Run based on cvmfs location of files (stand-alone, no mcjoboptions locally)")
17
18
19
20

opts, fileargs = parser.parse_args()

MCXX='%s.'%opts.MC_VER
21
location = '/cvmfs/atlas.cern.ch/repo/sw/Generators/MCJobOptions' if opts.STANDALONE else '.'
22
nEventsRequested=0
23

24
25
26
# define dictionaries with keys as variables to be searched for and values to store the results
JOsDict={
    'using release':[],
27
28
    'including file "'+MCXX:[],
    'including file "EvgenJobTransforms/LHEonly.py"':[],
29
    'including file "runargs.afterburn.py"':[],
30
    'ATHENA_PROC_NUMBER':[]
31
32
33
}

generatorDict={
34
35
    'Setting event_norm':[], # used in MG for newer releases
    '"event_norm"':[], # used in MG for older releases
36
    'Setting nevents':[],
37
38
    'No pdf base fragment':[],
    'Generating events from gridpack':[],
39
40
    'Integration grid files found locally. Event generation shall continue':[],
    'You asked Pythia8 to modify properties for particle':[]
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
}

testHepMCDict={
    'Events passed':[],
    'Efficiency':[]
}

countHepMCDict={
    'Events passing all checks and written':[]
}

evgenFilterSeqDict={
    'Filter Efficiency':[]
}

simTimeEstimateDict={
    'RUN INFORMATION':[]
}

metaDataDict={
    'physicsComment =':[],
    'generatorName =':[],
    'generatorTune':[],
    'specialConfig =':[],
    'contactPhysicist =':[],
    'genFilterNames = ':[],
    'cross-section (nb)':[],
    'generator =':[],
    'weights =':[],
    'PDF =':[],
    'GenFiltEff =':[],
    'sumOfNegWeights =':[],
73
    'sumOfPosWeights =':[],
74
75
    'Number of input LHE events  =':[],
    'randomSeed =':[],
76
77
78
}

generateTfDict={
79
    'ecmEnergy':[],
80
81
82
83
    'nEventsPerJob':[],
    'Requested output events':[],
    'transform':[],
    'inputFilesPerJob':[],
84
    'inputGeneratorFile':[],
85
86
    'evgenkeywords':['found'],
    'inputEVNT_PreFile':[]
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
}

perfMonDict={
    'snapshot_post_fin':[],
    'jobcfg_walltime':[],
    'last -evt vmem':[]
}

testDict = {
    'TestHepMC':testHepMCDict,
    'EvgenFilterSeq':evgenFilterSeqDict,
    'CountHepMC':countHepMCDict,
    'SimTimeEstimate':simTimeEstimateDict
}

# Function to get jO includes
def getJOsList():
    liststr=''
105
    # Get jO files mc.*.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    if len(JOsDict["including file \""+MCXX]):
        if len(liststr): liststr+="|"
        liststr+="|".join(JOsDict["including file \""+MCXX]).replace("nonStandard/","")
    liststr=liststr.replace('/','').replace('"','').replace('including file','').replace(' ','')
    tmplist=liststr.split('|')
    return tmplist

# Function to check blacklist
def checkBlackList(relFlavour,cache,generatorName,location) :
    isError = None
    with open(location+'/common/BlackList_caches.txt') as bfile:
        for line in bfile.readlines():
            if not line.strip():
                continue
            # Blacklisted release flavours
            badRelFlav=line.split(',')[0].strip()
            # Blacklisted caches
            badCache=line.split(',')[1].strip()
            # Blacklisted generators
            badGens=line.split(',')[2].strip()
            
            #Match Generator and release type e.g. AtlasProduction, MCProd
            if relFlavour==badRelFlav and cache==badCache and re.search(badGens,generatorName) is not None:
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
                return True
    return False

# Function to check blacklist
def checkPurpleList(relFlavour,cache,generatorName,location) :
    isError = None
    with open(location+'/common/PurpleList_generators.txt') as bfile:
        for line in bfile.readlines():
            if not line.strip():
                continue
            # Purple-listed release flavours
            badRelFlav=line.split(',')[0].strip()
            # Purple-listed caches
            badCache=line.split(',')[1].strip()
            # Purple-listed generators
            badGens=line.split(',')[2].strip()
            # Reason for purple-list
            reason=line.split(',')[3].strip()
            
            #Match Generator and release type e.g. AtlasProduction, MCProd
            if relFlavour==badRelFlav and cache==badCache and re.search(badGens,generatorName) is not None:
                return True, reason
    return False, None
152
153
154
155
156
157
158
159
160
161
162
163

# Function to parse log.generate lines using a given identifier - result is stored in dictionary
def checkLine(line, lineIdentifier, dict, splitby):
    if lineIdentifier in line:
        for param in dict:
            if param=="including file \""+MCXX:
                if "including file" in line and MCXX in line:
                    if len(line.split(splitby))==0:
                        raise RuntimeError("Found bad entry %s"%line)
                    else:
                        thing="".join(line.split(lineIdentifier)[1].split(splitby)[1:]).split("/")[-1].strip()
                        dict[param].append(thing)
164
                continue
165
166
            elif param=="Requested output events":
                if "Requested output events" in line:
167
168
169
                   thing="".join(line.split(lineIdentifier)[1].split(" ")[-1]).strip()
                   dict[param].append(thing)
                continue
170
171
172
173
            elif param=="inputFilesPerJob":
                if "inputFilesPerJob" in line:
                    thing=line.split(splitby)[1].split()[0]
                    dict[param].append(thing)
174
                continue
175
176
            elif param=="inputGeneratorFile":
                if "inputGeneratorFile" in line:
177
178
                    if not "Herwig" in line:
                        # Gen_tf print out looks like Py:Gen_tf  INFO inputGeneratorFile used /path/to/file
179
180
181
182
                        if "used" in line:
                            thing="".join(line.split(" used ")[-1]).strip()
                        else:
                            thing="".join(line.split(" = ")[-1]).strip()
183
184
185
186
187
                        dict[param].append(thing)
                    elif "Herwig" in line and "is compressed" in line:
                        # Herwig7_i printout looks like Py:Herwig7_i/Herwig7_LHEF.py INFO inputGeneratorFile 'file1, file2, ...' is compressed - will look for uncompressed LHE file
                        thing="".join(line.split(lineIdentifier)[1].split(splitby)[1].split()[0]).strip('\'')
                        dict[param].append(thing)
188
189
190
191
192
                continue
            elif param=="evgenkeywords":
                if "Could not find evgenkeywords.txt" in line:
                    dict[param]=['not found']
                continue
193
194
195
196
197
198
199
            else:
                if param in line:
                    if len(line.split(splitby))==0:
                        raise RuntimeError("Found bad entry %s"%line)
                    else:
                        thing="".join(line.split(lineIdentifier)[1].split(splitby)[1:]).strip()
                        dict[param].append(thing)
200

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252

# For printing
class bcolors:
    if not opts.NO_COLOUR:
        HEADER = '\033[95m'
        OKBLUE = '\033[94m'
        OKGREEN = '\033[92m'
        WARNING = '\033[93m'
        FAIL = '\033[91m'
        ENDC = '\033[0m'
    else:
        HEADER = ''
        OKBLUE = ''
        OKGREEN = ''
        WARNING = ''
        FAIL = ''
        ENDC = ''

    def disable(self):
        self.HEADER = ''
        self.OKBLUE = ''
        self.OKGREEN = ''
        self.WARNING = ''
        self.FAIL = ''
        self.ENDC = ''

# For counting errors/warnings
class LogCounts:
    Errors = 0
    Warnings = 0

# Functions that print coloured output
def loginfo(out1,out2):
    print(f"{out1:s}{bcolors.OKBLUE} {out2:s}{bcolors.ENDC}")
def loggood(out1,out2):
    print(f"{out1:s}{bcolors.OKGREEN} {out2:s}{bcolors.ENDC}")
def logerr(out1,out2):
    print(f"{out1:s}{bcolors.FAIL} {out2:s}{bcolors.ENDC}")
    LogCounts.Errors += 1
def logwarn(out1,out2):
    print(f"{out1:s}{bcolors.WARNING} {out2:s}{bcolors.ENDC}")
    LogCounts.Warnings += 1

def pad(seq, target_length, padding=None):
    length = len(seq)
    if length > target_length:
        return seq
    seq.extend([padding] * (target_length - length))
    return seq

# Functions for generator-specific tests
# Sherpa checks
253
def sherpaChecks(logContent):
254
255
256
    # check each line
    inside = 0
    numexceeds =0
257
    retriedBlock=False
258
    for line in logContent:
259
260
261
262
        if "exceeds maximum by" in line:
            numexceeds +=1
            loginfo("- "+line.strip(),"")
        if "Retried events" in line:
263
            retriedBlock = True
264
            continue
265
        if retriedBlock:
266
            if "}" in line:
267
268
269
270
                retriedBlock=False
                continue
            if len(line.split('"')) == 1 or len(line.split('->'))== 1:
                continue
271
272
273
274
275
            name = line.split('"')[1]
            percent = line.split('->')[1].split("%")[0].strip()
            if float(percent) > 5.:
                logwarn("- retried events "+name+" = ",percent+" % <-- WARNING: more than 5% of the events retried")
            else:
276
                loginfo("- retried events "+name+" = ",percent+" %")
277
278
    if numexceeds*33>int(nEventsRequested):
        logwarn("","WARNING: be aware of: "+str(numexceeds*100./nEventsRequested)+"% of the event weights exceed the maximum by a factor of ten")
279

280
# Pythia 8 checks
281
def pythia8Checks(logContent,generatorName,pl_reason):
282
283
284
    usesShowerWeights = False
    usesMatchingOrMerging = False
    usesCorrectPowheg = False
285
    usesCKKWL=False
286
    errors = False
287
    # extract pythia version from generator name
288
    versionString = re.search('Pythia8\(v\.(.+?)[p\)]', generatorName)
289
    p8ver = int(versionString.group(1)) if versionString else 0
290
    for line in logContent:
291
292
293
294
        if "Pythia8_ShowerWeights.py" in line:
            usesShowerWeights = True
        if "Pythia8_aMcAtNlo.py" in line or "Pythia8_CKKWL_kTMerge.py" in line or "Pythia8_FxFx.py" in line:
            usesMatchingOrMerging = True
295
        if "Pythia8_Powheg_Main31.py" in line or "PowhegControl_bblvlv_Common.py" in line:
296
            usesCorrectPowheg = True
297
298
        if "CKKW-L merge" in line:
            usesCKKWL=True
299
    if usesShowerWeights and usesMatchingOrMerging and p8ver < 307:
300
301
302
303
304
        logerr("ERROR:","Pythia 8 shower weights buggy when using a matched/merged calculation. Please remove the Pythia8_ShowerWeights.py include.")
        errors = True
    if "Powheg" in generatorName and not usesCorrectPowheg:
        logerr("ERROR:",generatorName+" used with incorrect include file. Please use Pythia8_Powheg_Main31.py")
        errors = True
305
306
    if len(generatorDict['You asked Pythia8 to modify properties for particle']) != 0:
        logwarn("WARNING: modification of particle properties requested: ", ''.join(generatorDict['You asked Pythia8 to modify properties for particle']))
307
308
    # For purple-listed releases check whether CKKW-L is the reason for purple-listing and the jO is using CKKW-L
    if pl_reason and (usesCKKWL and "CKKW" in pl_reason):
309
        logwarn("WARNING:", "jO uses CKKW-L merging which is problematic in the athena release you are using. Consider changing release.")
310
311
    if not errors:
        loggood("INFO: Pythia 8 checks:","Passed")		
312

313
# Herwig 7 checks
314
def herwig7Checks(logContent,generatorName):
315
    errors = False
316
317
318
    allowed_tunes=['H7.1-Default', 'H7.1-SoftTune', 'H7.1-BaryonicReconnection']
    if "7.1" in generatorName:
        if metaDataDict['generatorTune'][0] not in allowed_tunes:
319
            logerr("ERROR:", f"Metadata tune set to {metaDataDict['generatorTune'][0]}, which is not in the list of allowed tunes: {allowed_tunes}")
320
            errors = True
321
        for line in logContent:
322
323
324
            if "Herwig7_EvtGen.py" in line:
                logerr("ERROR:","Herwig 7.1 used with wrong include: Herwig7_EvtGen.py. Please use Herwig71_EvtGen.py instead.")
                errors = True
325
                break
326
327
328
    if not errors:
        loggood("INFO: Herwig 7 checks:","Passed")

329
# Madgraph checks
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
330
def madgraphChecks(logFile, logContent):
331
332
    errors=False
    # Check that the events that MG generates are 10% more than nEventsPerJob
333
    if not generateTfDict['inputGeneratorFile'] and not LHEonly: # This check only makes sense if no external LHE inputs are used and it's not an LHE-only job
334
        try:
335
336
337
            neventsMG=int(float(generatorDict['Setting nevents'][-1]))
            if len(generatorDict['Setting nevents']) > 1:
                logwarn("WARNING:","jO seem to be changing nevents multiple times. Check with MG experts if this is acceptable.")
338
339
340
341
342
        except:
            raise RuntimeError("nevents was not set in the MG5aMC jO")
        if neventsMG < int(1.1*nEventsRequested):
            logerr("ERROR:",f"Increase nevents to be generated in MG from {neventsMG} to {int(1.1*nEventsRequested)}")
            errors=True
343
344
345
    # Check if PDF base fragments were included
    if generatorDict['No pdf base fragment']:
        logwarn("WARNING:","No PDF base fragment was included, which is the recommended way to steer pdf and systematics (see https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/MadGraph5aMCatNLOForAtlas#PDF_and_systematics_via_Base_fra)")
346
    # If gridpack is used check to see if gridpack is found (LHE only jobs produce the gridpack themselves so no need to check this)
347
    if generatorDict['Generating events from gridpack']:
348
349
350
351
        if not glob.glob(f"{os.path.dirname(logFile)}/mc_*TeV.*.GRID.tar.gz"):
            logerr("ERROR:",f"Gridpack was used but no mc_*TeV.*.GRID.tar.gz file present in {os.path.dirname(logFile)}")
            errors = True
    # Check whether the correct normalisation is used
352
353
354
355
356
357
    if generatorDict['Setting event_norm']:
        if generatorDict['Setting event_norm'][-1] == 'sum':
            logwarn("WARNING:","The use of event_norm=sum will almost always result in the sample having the wrong total cross section -- please double check that event_norm=average is set in the param_card.dat.")
    else:
        if generatorDict['"event_norm"'] and generatorDict['"event_norm"'][-1] == 'sum':
            logwarn("WARNING:","The use of event_norm=sum will almost always result in the sample having the wrong total cross section -- please double check that event_norm=average is set in the param_card.dat.")
358
    for line in logContent:
359
360
361
362
        if "We need to recalculate the branching fractions" in line:
            br_particles=[p.strip() for p in line.split('for')[-1].split(',')]
            bad_br_particles=[p for p in br_particles if p in ['t','t~','w+','w-','z','h']]
            if len(bad_br_particles)>0:
363
364
365
                logwarn("WARNING:","MadWidth is used to calculate the branching ratios of {}. This is only LO accurate. For more accurate BRs, please set them explictly in the param_card.dat.".format(",".join(bad_br_particles)))
    if not errors:
        loggood("INFO: MadGraph checks:","Passed")
366

367
368
369
370
371
372
373
374
375
376
# Powheg checks
def powhegChecks(logFile):
    errors = False
    if generatorDict['Integration grid files found locally. Event generation shall continue']:
        if not glob.glob(f"{os.path.dirname(logFile)}/mc_*TeV.*.GRID.tar.gz"):
            logerr("ERROR:",f"Gridpack was used but no mc_*TeV.*.GRID.tar.gz file present in {os.path.dirname(logFile)}")
            errors = True
    if not errors:
        loggood("INFO: Powheg checks:","Passed")

377

378
# Main function
379
380
381
def main():
    """logParser.py script for parsing log.generate files to check MC production settings and output
     - Written by Josh McFayden <mcfayden@cern.ch> Nov 2016 """
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
382
    
383
    global nEventsRequested, LHEonly, EVNTtoEVNT
384

385
386
387
388
389
    if opts.INPUT_FILE=="-":
        parser.print_help()
        return 
    
    # open and read log file
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
390
391
    logFile=open(opts.INPUT_FILE,"r",errors="replace")
    logContent=logFile.readlines()
392

393
    # check each line
394
    for line in logContent:
395
396
        checkLine(line,'Py:Athena',JOsDict,'INFO')
        checkLine(line,'MetaData',metaDataDict,'=')
397
        checkLine(line,'Py:Gen_tf',generateTfDict,'=')
398
        checkLine(line,'Py:Generate_ab',generateTfDict,'=')
399
400
401
402
403
404
        checkLine(line,'Py:PerfMonSvc',perfMonDict,':')
        checkLine(line,'PMonSD',perfMonDict,'---')
        checkLine(line,'TestHepMC',testHepMCDict,'=')
        checkLine(line,'Py:EvgenFilterSeq',evgenFilterSeqDict,'=')
        checkLine(line,'CountHepMC',countHepMCDict,'=')
        checkLine(line,'SimTimeEstimate',simTimeEstimateDict,'|')
405
406
        checkLine(line,'Py:MadGraphUtils',generatorDict,'=')
        checkLine(line,'Py:MadGraphSysUtils',generatorDict,'WARNING !!!')
407
        checkLine(line,'Py:PowhegControl',generatorDict,'INFO')
408
        checkLine(line,'Py:newP8util',generatorDict,'WARNING')
409
410
        # This is a slight abuse (extract inputGeneratorFile printed from H7 interface and put it into transform dictionary)
        checkLine(line,'Py:Herwig7_i/Herwig7_LHEF.py',generateTfDict,'inputGeneratorFile')
411
412
        # Again an abuse to extract number of events for MadGraph which are printed like "05:14:02      Nb of events :  20000"
        checkLine(line,'Nb of events',generatorDict,':')
413
        # Get if process is multi-core - no line identifier here, so hack this too "ATHENA_PROC_NUMBER set to  8"
414
        checkLine(line,'ATHENA_PROC_NUMBER',JOsDict,'set to')
415
        
416
417
418
    ### Special generators that are to be excluded from certain tests
    specialGenerators=["Superchic", "Starlight", "ParticleGun"]
    
419
420
421
    ### Check if it's a LHE-only jO
    if len(JOsDict["including file \"EvgenJobTransforms/LHEonly.py\""]): LHEonly=True
    else: LHEonly=False
422
423
424
425
426
    
    ### Check if it's an EVNT->EVNT job
    if len(JOsDict["including file \"runargs.afterburn.py\""]): EVNTtoEVNT=True
    else: EVNTtoEVNT=False
    
427
    ### jO checks
428
    JOsErrors=[]
429
430
431
432
    print("")
    print("---------------------")
    print("jobOptions and release:")
    print("---------------------")
433

434
435
    # Checking jobOptions
    JOsList=getJOsList()
436
437
438
439
440
    if not len(JOsList):
        JOsErrors.append("including file \""+MCXX)
    else:
        if not len(JOsDict["including file \""+MCXX]):
            JOsErrors.append("including file \""+MCXX)
441
    loginfo('- LHEonly = ',f'{LHEonly}')
442
    loginfo('- EVNT to EVNT = ',f'{EVNTtoEVNT}')
443
   
444
445
446
    # Extract generator name
    generatorName=metaDataDict['generatorName ='][0]

447
    # Checking release
448
449
450
    if not len(JOsDict['using release']):
        JOsErrors.append(JOsDict['using release'])
    else:
451
        tmp=JOsDict['using release'][0].replace('using release','').strip().split()[0]
452
        val=tmp.replace('[','').replace(']','')
453
        flavour=val.split('-')[0]
454
        release=val.split('-')[1]
455
        blacklisted=checkBlackList(flavour,release,generatorName,location=location)
456
        purplelisted, pl_reason=checkPurpleList(flavour,release,generatorName,location=location)
457
458
        # check that release is AthGeneration
        if flavour != "AthGeneration":
459
            logerr('- using release = ',f"{val} <-- ERROR: AthGeneration should be used instead of {flavour}")
460
        else:
461
462
            # check blacklist
            if blacklisted:
463
                logerr('- using release = ', f"{val} <-- ERROR: {val} is blacklisted")
464
            else:
465
                loggood('- using release = ', f"{val}")
466
467
468
    
    # Check if the job is multicore
    if len(JOsDict['ATHENA_PROC_NUMBER']):
469
470
471
472
473
        releaseNumber=int(release.split(".")[0])*10000+int(release.split(".")[1])*100+int(release.split(".")[2])
        if releaseNumber < 210660:
            logerr('- ATHENA_PROC_NUMBER = ',f"{JOsDict['ATHENA_PROC_NUMBER'][0].split()[0]} <-- ERROR: cannot use multi-core mode with rel < 21.6.60")
        else:
            loginfo('- ATHENA_PROC_NUMBER = ', f"{JOsDict['ATHENA_PROC_NUMBER'][0].split()[0]}")
474
475
    
    if len(JOsErrors):
476
477
        print("---------------------")
        print("MISSING JOs:")
478
479
        for i in JOsErrors:
            if i == "including file \""+MCXX:
480
                logerr("",f"ERROR: jO not found! (log.generate should contain lines like: including file \"{MCXX}*.py\")")
481
            else:
482
                logwarn("","WARNING: %s is missing!"%i)
483
484
    
    
485
    ### Generate transform checks
486
    generateErrors=[]
487
488
    print("")
    print("---------------------")
489
    print("Generate transform params:")
490
    print("---------------------")
491
492
493
494

    # add default energy - for backwards compatibility otherwise logParser would crash - to fix
    generateTfDict["ecmEnergy"].append("13000")

495
    for key in list(generateTfDict.keys()):
496
        optionalTests=['inputGeneratorFile', 'inputEVNT_PreFile']
497
        val=generateTfDict[key]
498
        if not len(val):
499
            if key not in optionalTests:
500
                generateErrors.append(key)
501
        else:
502
            if key == 'nEventsPerJob':
503
504
                # Allow to overwrite nEventsPerJob from jO file if specified (or present)
                if opts.JOFILE:
505
506
507
                    with open(opts.JOFILE, 'r') as jof:
                        jOContent = jof.read()
                    nEventsPerJob=jo_utils._read_param_from_jo(jOContent, ['evgenConfig.nEventsPerJob'])
508
509
510
511
                else:
                    # No jO file specified, still try to overwrite from JO in same dir
                    jOFile=os.path.join(location,os.path.dirname(opts.INPUT_FILE),JOsList[0])
                    if os.path.exists(jOFile):
512
513
514
                        with open(jOFile, 'r') as jof:
                            jOContent = jof.read()
                        nEventsPerJob=jo_utils._read_param_from_jo(jOContent, ['evgenConfig.nEventsPerJob'])
515
516
                    else:
                        nEventsPerJob=str(val[0]).split('#')[0].strip()
517
                if not nEventsPerJob:
518
519
                    logwarn("WARNING:", f"evgenConfig.nEventsPerJob is not defined in the jO. Will set to default: 10000")
                    nEventsPerJob=10000
520
                val=nEventsPerJob
521
                generateTfDict['nEventsPerJob'] = val
522
            elif key == 'transform':
523
                val=val[0]
524
                releaseNumber=int(release.split(".")[0])*10000+int(release.split(".")[1])*100+int(release.split(".")[2])
525
                if (val != 'Gen_tf' and val != 'Gen_tf_txt') and releaseNumber > 210610:
526
                    logerr(f"- {key} = ",f"{val} <- ERROR: tranform = {val} and release is {release}. Please use Gen_tf or Gen_tf_txt!")
527
                    continue
528
529
530
            elif key == 'evgenkeywords' and val[0] != 'found':
                logwarn(f"- {key} = ",f"{val[0]} <- WARNING: Keyword check has not been performed. Please check that the keywords used in the jobOption are in the allowed list of keywords: https://gitlab.cern.ch/atlas/athena/-/blob/21.6/Generators/EvgenJobTransforms/share/file/evgenkeywords.txt")
                continue
531
            elif key == 'inputFilesPerJob':
532
533
                val=val[0]
                # Hard limit from ADC is 1000 files
534
                if int(val) > 100:
535
                    logerr(f"- {key} = ",f"{val} <- ERROR: Need to use less than 100.")
536
                    continue
537
            else:
538
539
                val=val[0]
            loginfo(f"- {key} = ",f"{val}")
540
            
541
542
543
544
    # Check if input files are used that inputFilesPerJob matches the number of input files
    if generateTfDict['inputGeneratorFile'] and len(generateTfDict['inputGeneratorFile'][0].split(',')) != int(generateTfDict['inputFilesPerJob'][0]):
        logerr("", f"ERROR: {len(generateTfDict['inputGeneratorFile'][0].split(','))} input files used while inputFilesPerJob={generateTfDict['inputFilesPerJob'][0]}")

545
    if len(generateErrors):
546
547
        print("---------------------")
        print("MISSING Generate params:")
548
549
550
        for i in generateErrors:
            logerr("","ERROR: %s is missing!"%i)
            
551
    # Number of requested output events
552
    nEventsRequested=int(generateTfDict["Requested output events"][0])
553
    
554
    ### Metadata checks
555
    metaDataErrors=[]
556
557
558
559
    print("")
    print("---------------------")
    print("Metadata:")
    print("---------------------")
560
561
562
563
564
565
566
567
568
569
570
571
    for key in metaDataDict:
        name=key.replace("=","").strip()
        val=metaDataDict[key]
        if not len(val):
            metaDataErrors.append(name)
        else:
            if name=="contactPhysicist":
                if '@' in "".join(val):
                    loggood( '- '+name+' = ',"".join(val))
                else:
                    logerr( '- '+name+' = ',"".join(val)+"  <-- ERROR: No email found")
                continue
572
            elif name=="cross-section (nb)":
573
                if float(val[0]) < 0:
574
                    logwarn( '- '+name+' = ',"".join(val)+"  <-- WARNING: Cross-section is negative")
575
                    continue
576
577
            loginfo( '- '+name+' = ',"".join(val))
    
578
    # Check for negative weight fraction
579
580
581
582
    if len(metaDataDict["sumOfPosWeights ="]) and len(metaDataDict["sumOfNegWeights ="]):
        ratio = float(metaDataDict["sumOfNegWeights ="][0])*1.0/(float(metaDataDict["sumOfPosWeights ="][0]) + float(metaDataDict["sumOfNegWeights ="][0]))
        if ratio>0.15:
            logwarn( '- sumOfNegWeights/(sumOfPosWeights+sumOfNegWeights) = ',str(ratio)+"  <-- WARNING: more than 15% of the weights are negative")
583
    
584
    if len(metaDataErrors):
585
586
        print("---------------------")
        print("MISSING Metadata:")
587
        for i in metaDataErrors:
588
            if i=="weights" or i=="genFilterNames" or i=="generator" or i=="PDF" or i=="sumOfNegWeights" or i=="sumOfPosWeights" \
589
               or ((i=="cross-section (nb)" or i=="generatorTune") and (any(gen in generatorName for gen in specialGenerators)))\
590
591
               or i=="Number of input LHE events"\
               or (i=="generatorTune" and LHEonly)\
592
               or (EVNTtoEVNT and (i=="generatorTune" or i=="cross-section (nb)" or i=="randomSeed")):
593
594
595
596
                loginfo("INFO:","%s is missing"%i)
            else:
                logerr("","ERROR: %s is missing!"%i)
            
597
598
599
600
601
602
    ### Generator specific tests
    print("")
    print("-------------------------")
    print(f"Generator specific tests: {generatorName}")
    print("-------------------------")
    if "Sherpa" in generatorName:
603
        sherpaChecks(logContent)
604
    if "Pythia8" in generatorName:
605
        pythia8Checks(logContent,generatorName,pl_reason)
606
    if "Herwig7" in generatorName:
607
        herwig7Checks(logContent,generatorName)
608
    if "MadGraph" in generatorName or "aMcAtNlo" in generatorName:
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
609
        madgraphChecks(opts.INPUT_FILE, logContent)
610
    if "Powheg" in generatorName:
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
611
        powhegChecks(opts.INPUT_FILE)
612

613
    ###  Event tests
614
615
    testErrors=[]
    filt_eff=1.0
616
    CountHepMC=0
617
618
619
620
    print("")
    print("---------------------")
    print("Event tests:")
    print("---------------------")
621
622
623
624
625
626
627
628
629
    for dictkey in testDict:
        for key in testDict[dictkey]:
            name=key
            val=testDict[dictkey][key]
            if not len(val):
                testErrors.append("%s %s"%(dictkey,name))
            else:
                #Check final Nevents processed
                if dictkey=="CountHepMC":
630
                    nEventsAllowedInProduction = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
631
                    CountHepMC=int(val[0])
632
633
634
635
636
637
638
                    # For LHE only runs, CountHepMC will always be 1 (only 1 event showered) so we set this equal to the number of events requested
                    # which represents the number of events written in the LHE
                    if LHEonly:
                        CountHepMC=nEventsRequested
                        continue # We also don't want to perform the following checks
                    # If CountHepMC is equal to nEventsPerJob it means the production job and the test job have equal number of events
                    # otherwise the production job will be run with different number of events than the test job
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
639
                    tmp=CountHepMC if CountHepMC == int(nEventsPerJob) else int(nEventsPerJob)
640
641
                    if not (tmp in nEventsAllowedInProduction or tmp % 10000 == 0):
                        logerr( '- '+dictkey+" "+name+' = ', f"{tmp}  <-- ERROR: Not an acceptable number of events for production (1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000) or multiple of 10k")
642
                    elif CountHepMC != nEventsRequested:
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
643
                        logerr( '- '+dictkey+" "+name+' = ', f"{val[0]}  <-- ERROR: This is not equal to Requested output events={nEventsRequested}")
644
                    else:
645
646
647
648
649
650
651
652
653
654
655
656
                        loggood( '- '+dictkey+" "+name+' = ',"".join(val))
                    continue
                if dictkey=="TestHepMC" and name=="Efficiency":
                    if float(val[0].replace('%',''))<100. and float(val[0].replace('%',''))>=98.:
                        logwarn( '- '+dictkey+" "+name+' = ',"".join(val))
                    elif float(val[0].replace('%',''))<100.:
                        logerr( '- '+dictkey+" "+name+' = ',"".join(val))
                    else:
                        loggood( '- '+dictkey+" "+name+' = ',"".join(val))
                    continue
                loginfo( '- '+dictkey+" "+name+' = ',"".join(val))
    
657
658
    # Check for enough LHE events
    if len(metaDataDict["Number of input LHE events  ="]):
659
660
661
662
663
664
        NLHE = int(metaDataDict["Number of input LHE events  ="][0])
        filtEff = float(evgenFilterSeqDict["Filter Efficiency"][0].split()[0])
        Nevents = float(generateTfDict["Requested output events"][0])
        target_4sigma = (Nevents+4*(Nevents**0.5))/filtEff
        target_10perc = 1.1*Nevents/filtEff
        if NLHE >= int(target_4sigma) or NLHE >= int(target_10perc):
665
            loggood("- Number of input LHE events:",f"{NLHE}")
666
667
        else:
            logerr("- Number of input LHE events:",f"{NLHE} <-- Needs to be higher than {int(target_4sigma)}")
668

669
    if len(testErrors):
670
671
        print("---------------------")
        print("Failed tests:")
672
673
        for i in testErrors:
            if i =="SimTimeEstimate RUN INFORMATION":
674
                if EVNTtoEVNT or LHEonly: continue
675
                else: logwarn("","WARNING: %s is missing!"%i)
676
            else:
677
678
679
                # TestHepMC is not needed for special generators or when running LHE only or EVNT to EVNT
                if ("TestHepMC" in i) and ((any(gen in generatorName for gen in specialGenerators)) or LHEonly or EVNTtoEVNT):
                    loginfo("INFO:",f"{i} is missing")
680
681
                else:
                    logerr("","ERROR: %s is missing!"%i)
682
683


684
    ### Performance tests
685
686
687
688
689
690
691
692
693
694
695
696
    cpuPerJob=0.0
    perfMonErrors=[]
    print("")
    print("---------------------")
    print("Performance metrics:")
    print("---------------------")
    for key in perfMonDict:
        name=key
        val=perfMonDict[key]
        if not len(val):
            perfMonErrors.append(name)
        else:
Frank Siegert's avatar
Frank Siegert committed
697
            if key == 'snapshot_post_fin' and not opts.SKIPCPU:
698
699
700
701
702
                name = 'CPU'
                tmp = 0.
                tmp=float(val[0].split()[3])
                if len(perfMonDict['jobcfg_walltime']):
                    tmp+=float(perfMonDict['jobcfg_walltime'][0].split()[1].split('=')[1])
703
704
705
706
707
708
709
710
711
712
713
                cpuPerJob=tmp/(1000.*60.*60.)
                # Calculate timing and extrapolate if test run was run with less events
                if CountHepMC != int(nEventsPerJob):
                    print(f"- actual CPU ({CountHepMC} events) = {cpuPerJob:.2f} hrs")
                    cpuPerJob=float(nEventsPerJob)*cpuPerJob/float(CountHepMC)
                    loginfo(f"- CPU extrapolated to {nEventsPerJob} events =", f"{cpuPerJob:.1f} hrs")
                if cpuPerJob > 18.:
                    logerr( f"- {name} = ",f"{cpuPerJob:.2f} hrs  <-- ERROR: Too high CPU time - should be between 6-12h. Adjust nEventsPerJob!")
                elif cpuPerJob >= 6. and cpuPerJob <= 12.:
                    loggood( f"- {name} = ",f"{cpuPerJob:.2f} hrs")
                elif cpuPerJob < 1.:
714
                    if ((CountHepMC == int(nEventsPerJob) and CountHepMC < 10000) or
715
                        (CountHepMC != int(nEventsPerJob) and int(nEventsPerJob) < 10000)) and not (LHEonly or EVNTtoEVNT):
716
                        logerr( f"- {name} = ",f"{cpuPerJob:.2f} hrs <-- ERROR: Too low CPU time - should be between 6-12h. Adjust nEventsPerJob!")
717
                    else:
718
                        loggood( f"- {name} = ",f"{cpuPerJob:.2f} hrs")
719
                else:
Spyros Argyropoulos's avatar
Spyros Argyropoulos committed
720
721
                    if ((CountHepMC == int(nEventsPerJob) and CountHepMC < 10000) or
                        (CountHepMC != int(nEventsPerJob) and int(nEventsPerJob) < 10000)):
722
                        logwarn( f"- {name} = ",f"{cpuPerJob:.2f} hrs  <-- WARNING: CPU time not optimal - should be between 6-12h. Adjust nEventsPerJob!")
723
                    else:
724
                        loggood( f"- {name} = ",f"{cpuPerJob:.2f} hrs")
725
                # Also print timing information for CI - CI runs max(1,0.01*nEventsPerJob)
726
                CICPU=max(1,0.01*float(nEventsPerJob))*cpuPerJob/float(nEventsPerJob)
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
                print(f"- estimated CPU for CI job = {CICPU:.2f} hrs")
                
            if key == 'last -evt vmem':
                name = 'Virtual memory'
                tmp=float(val[0].split()[0])
                if tmp > 4000 and tmp < 8000:
                    logwarn( '- '+name+' = ',"".join(val)+"  <-- WARNING: High memory usage - alert MC production team")
                elif tmp > 8000:
                    logerr( '- '+name+' = ',"".join(val)+"  <-- ERROR: Too high memory usage")
                else:
                    loggood( '- '+name+' = ',"".join(val))
    
    
    if len(perfMonErrors):
        print("---------------------")
        print("MISSING Performance metric:")
        for i in perfMonErrors:
744
            logerr("ERROR:", f"{i} is missing!")
745
            
746
747
    
    # Print total number of Errors/Warnings
748
749
750
751
    print("")
    print("---------------------")
    print(" Summary:")
    print("---------------------")
752
753
754
755
    if (LogCounts.Errors == 0):
        if (LogCounts.Warnings == 0):
            loggood("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> OK for production")
        else:
756
	        logwarn("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> Some warnings encountered, check that these are ok before submitting production!")
757
758
759
760
761
    else:
        logerr("Errors : "+str(LogCounts.Errors)+" , Warnings : "+str(LogCounts.Warnings)," -> Errors encountered! Not ready for production!")  
        
    return 
    
762
    
763
764
if __name__ == "__main__":
    main()
765
766