skel.GENtoEVGEN.py 39.4 KB
Newer Older
1
#  Copyright (C) 2002-2021 CERN for the benefit of the ATLAS collaboration
2
#
3
"""Functionality core of the Gen_tf transform"""
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

##==============================================================
## Basic configuration
##==============================================================

## Create sequences for generators, clean-up algs, filters and analyses
## and import standard framework objects with standard local scope names
import ast
import os, re, string, subprocess
import AthenaCommon.AlgSequence as acas
import AthenaCommon.AppMgr as acam
from AthenaCommon.AthenaCommonFlags import jobproperties
theApp = acam.theApp
acam.athMasterSeq += acas.AlgSequence("EvgenGenSeq")
genSeq = acam.athMasterSeq.EvgenGenSeq
acam.athMasterSeq += acas.AlgSequence("EvgenFixSeq")
fixSeq = acam.athMasterSeq.EvgenFixSeq
acam.athMasterSeq += acas.AlgSequence("EvgenPreFilterSeq")
prefiltSeq = acam.athMasterSeq.EvgenPreFilterSeq
acam.athFilterSeq += acas.AlgSequence("EvgenTestSeq")
testSeq = acam.athFilterSeq.EvgenTestSeq
Danning Liu's avatar
Danning Liu committed
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
## NOTE: LogicalExpressionFilter is an algorithm, not a sequence
from EvgenProdTools.LogicalExpressionFilter import LogicalExpressionFilter
acam.athFilterSeq += LogicalExpressionFilter("EvgenFilterSeq")
filtSeq = acam.athFilterSeq.EvgenFilterSeq
topSeq = acas.AlgSequence()
anaSeq = topSeq
topSeq += acas.AlgSequence("EvgenPostSeq")
postSeq = topSeq.EvgenPostSeq
#topAlg = topSeq #< alias commented out for now, so that accidental use throws an error


##==============================================================
## Configure standard Athena services
##==============================================================

## Special setup for event generation
include("AthenaCommon/Atlas_Gen.UnixStandardJob.py")
include("PartPropSvc/PartPropSvc.py")

## Run performance monitoring (memory logging)
from PerfMonComps.PerfMonFlags import jobproperties as perfmonjp
perfmonjp.PerfMonFlags.doMonitoring = True
perfmonjp.PerfMonFlags.doSemiDetailedMonitoring = True

## Random number services
from AthenaServices.AthenaServicesConf import AtRndmGenSvc, AtRanluxGenSvc
svcMgr += AtRndmGenSvc()
svcMgr += AtRanluxGenSvc()

## Jobs should stop if an include fails.
jobproperties.AthenaCommonFlags.AllowIgnoreConfigError = False

## Compatibility with jets
from RecExConfig.RecConfFlags import jobproperties
jobproperties.RecConfFlags.AllowBackNavigation = True

## Set up a standard logger
from AthenaCommon.Logging import logging
64
evgenLog = logging.getLogger('Gen_tf')
65
66
67
68
69
70
71
72
73

##==============================================================
## Run arg handling
##==============================================================

## Announce arg checking
evgenLog.debug("****************** CHECKING EVENT GENERATION ARGS *****************")
evgenLog.debug(str(runArgs))

74
75
76
77
78
79
80
if hasattr(runArgs, "runNumber"):
   evgenLog.warning("##########################################################################" )
   evgenLog.warning("runNumber - no longer a valid argument, do not use it ! " )
   evgenLog.warning("##########################################################################")

if hasattr(runArgs, "inputGenConfFile"):
   raise RuntimeError("inputGenConfFile is invalid !! Gridpacks and config. files/links to be put into DSID directory ")
81
82

if hasattr(runArgs, "inputGeneratorFile"):
83
   evgenLog.info("inputGeneratorFile = " + runArgs.inputGeneratorFile)
84
 
85
if hasattr(runArgs, "outputYODAFile"):
86
    evgenLog.info("specified outputYODAFile = " + runArgs.outputYODAFile)
87

88
89
90
## Ensure that an output name has been given
# TODO: Allow generation without writing an output file (if outputEVNTFile is None)?
if not hasattr(runArgs, "outputEVNTFile") and not hasattr(runArgs, "outputEVNT_PreFile"):
91
92
93
94
95
    if hasattr(runArgs, "outputYODAFile"):
        evgenLog.info("No outputEVNTFile specified but outputYODAFile is used")
        evgenLog.info("Will run GENtoEVGEN without saving the output EVNT file, asuming a valid outputYODAFile will be produced")
    else:
        raise RuntimeError("No output evgen EVNT or EVNT_Pre file provided.")
96
97
98
99

## Ensure that mandatory args have been supplied (complain before processing the includes)
if not hasattr(runArgs, "ecmEnergy"):
    raise RuntimeError("No center of mass energy provided.")
100
101
102
else:
    evgenLog.info(' ecmEnergy = ' + str(runArgs.ecmEnergy) )

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
if not hasattr(runArgs, "randomSeed"):
    raise RuntimeError("No random seed provided.")
#if not hasattr(runArgs, "runNumber"):
#    raise RuntimeError("No run number provided.")
    # TODO: or guess it from the JO name??
if not hasattr(runArgs, "firstEvent"):
    raise RuntimeError("No first number provided.")


##==============================================================
## Configure standard Athena and evgen services
##==============================================================

## Announce start of job configuration
evgenLog.debug("****************** CONFIGURING EVENT GENERATION *****************")

## Functions for operating on generator names
## NOTE: evgenConfig, topSeq, svcMgr, theApp, etc. should NOT be explicitly re-imported in JOs
from EvgenJobTransforms.EvgenConfig import evgenConfig
from EvgenJobTransforms.EvgenConfig import gens_known, gens_lhef, gen_sortkey, gens_testhepmc, gens_notune, gen_require_steering

## Fix non-standard event features
from EvgenProdTools.EvgenProdToolsConf import FixHepMC
if not hasattr(fixSeq, "FixHepMC"):
    fixSeq += FixHepMC()

## Sanity check the event record (not appropriate for all generators)
from EvgenProdTools.EvgenProdToolsConf import TestHepMC
testSeq += TestHepMC(CmEnergy=runArgs.ecmEnergy*Units.GeV)
if not hasattr(svcMgr, 'THistSvc'):
    from GaudiSvc.GaudiSvcConf import THistSvc
    svcMgr += THistSvc()
svcMgr.THistSvc.Output = ["TestHepMCname DATAFILE='TestHepMC.root' OPT='RECREATE'"]

## Copy the event weight from HepMC to the Athena EventInfo class
# TODO: Rewrite in Python?
from EvgenProdTools.EvgenProdToolsConf import CopyEventWeight
if not hasattr(postSeq, "CopyEventWeight"):
    postSeq += CopyEventWeight()

## Configure the event counting (AFTER all filters)
# TODO: Rewrite in Python?
from EvgenProdTools.EvgenProdToolsConf import CountHepMC
svcMgr.EventSelector.FirstEvent = runArgs.firstEvent
theApp.EvtMax = -1
# This is necessary for athenaMP
#if hasattr(runArgs, "maxEvents"):
#  theApp.EvtMax = runArgs.maxEvents

if not hasattr(postSeq, "CountHepMC"):
    postSeq += CountHepMC()
154
#postSeq.CountHepMC.RequestedOutput = evgenConfig.nEventsPerJob if runArgs.maxEvents == -1 else runArgs.maxEvents
155
156

postSeq.CountHepMC.FirstEvent = runArgs.firstEvent
157
158
postSeq.CountHepMC.CorrectHepMC = True
postSeq.CountHepMC.CorrectEventID = True
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

## Print out the contents of the first 5 events (after filtering)
# TODO: Allow configurability from command-line/exec/include args
if hasattr(runArgs, "printEvts") and runArgs.printEvts > 0:
    from TruthIO.TruthIOConf import PrintMC
    postSeq += PrintMC()
    postSeq.PrintMC.McEventKey = "GEN_EVENT"
    postSeq.PrintMC.VerboseOutput = True
    postSeq.PrintMC.PrintStyle = "Barcode"
    postSeq.PrintMC.FirstEvent = 1
    postSeq.PrintMC.LastEvent  = runArgs.printEvts

## Estimate time needed for Simulation
from EvgenProdTools.EvgenProdToolsConf import SimTimeEstimate
if not hasattr(postSeq, "SimTimeEstimate"):
    postSeq += SimTimeEstimate()

## Add Rivet_i to the job
# TODO: implement auto-setup of analyses triggered on evgenConfig.keywords (from T Balestri)
if hasattr(runArgs, "rivetAnas"):
    from Rivet_i.Rivet_iConf import Rivet_i
    anaSeq += Rivet_i()
    anaSeq.Rivet_i.Analyses = runArgs.rivetAnas
182
183
    if hasattr(runArgs, "outputYODAFile"):
        anaSeq.Rivet_i.HistoFile = runArgs.outputYODAFile
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

##==============================================================
## Pre- and main config parsing
##==============================================================

## Announce JO loading
evgenLog.debug("****************** LOADING PRE-INCLUDES AND JOB CONFIG *****************")

## Pre-include
if hasattr(runArgs, "preInclude"):
    for fragment in runArgs.preInclude:
        include(fragment)

## Pre-exec
if hasattr(runArgs, "preExec"):
    evgenLog.info("Transform pre-exec")
    for cmd in runArgs.preExec:
        evgenLog.info(cmd)
        exec(cmd)

def get_immediate_subdirectories(a_dir):
            return [name for name in os.listdir(a_dir)
                    if os.path.isdir(os.path.join(a_dir, name))]
        
# TODO: Explain!!!
def OutputTXTFile():
    outputTXTFile = None
    if hasattr(runArgs,"outputTXTFile"): outputTXTFile=runArgs.outputTXTFile
    return outputTXTFile
## Main job option include
## Only permit one jobConfig argument for evgen: does more than one _ever_ make sense?
Danning Liu's avatar
Danning Liu committed
215

216
217
218
219
220
221
#commenting out for now
#if hasattr(runArgs, "inputGeneratorFile") or hasattr(runArgs, "outputTXTFile") or os.path.isfile("events.lhe") :
#    from EvgenProdTools.EvgenProdToolsConf import TestLHE
#    if not hasattr(genSeq, "TestLHE"):
#        genSeq += TestLHE()
#        print "lheFile events.lhe exists in current directory. Will execute TestLHE checks"
Danning Liu's avatar
Danning Liu committed
222

223
if len(runArgs.jobConfig) != 1:
224
    print "INFO    runArgs.jobConfig = ", runArgs.jobConfig
225
226
227
228
229
230
231
    evgenLog.error("You must supply one and only one jobConfig file argument")
    sys.exit(1)

print "Using JOBOPTSEARCHPATH (as seen in skeleton) = '%s'" % (os.environ["JOBOPTSEARCHPATH"])
FIRST_DIR = (os.environ['JOBOPTSEARCHPATH']).split(":")[0]
#print "The first search dir = ", FIRST_DIR

Chen Peng's avatar
Chen Peng committed
232
jofiles = [f for f in os.listdir(FIRST_DIR) if (f.startswith('mc') and f.endswith('.py'))]
233
234
235
236
237
238
#print "JO file ",jofiles
## Only permit one JO file in each dsid folder
if len(jofiles) !=1:
    evgenLog.error("You must supply one and only one jobOption file in DSID directory")
    sys.exit(1)
jofile = jofiles[0]
Chen Peng's avatar
Chen Peng committed
239

240
241
242
243
244
245
246
247
248
249
joparts = (os.path.basename(jofile)).split(".")
#jo = runArgs.jobConfig[0]
#jofile = os.path.basename(jo)
#joparts = jofile.split(".")
## Perform some consistency checks if this appears to be an "official" production JO
officialJO = False
if joparts[0].startswith("mc") and all(c in string.digits for c in joparts[0][2:]):
    officialJO = True
    ## Check that there are exactly 4 name parts separated by '.': MCxx, DSID, physicsShort, .py
    if len(joparts) != 3:
250
        evgenLog.error(jofile + " name format is wrong: must be of the form mc.<physicsShort>.py: please rename.")
251
252
253
        sys.exit(1)
    ## Check the length limit on the physicsShort portion of the filename
    jo_physshortpart = joparts[1]
254
    if len(jo_physshortpart) > 50:
Zach Marshall's avatar
Zach Marshall committed
255
        evgenLog.error(jofile + " contains a physicsShort field of more than 50 characters: please rename.")
256
257
258
259
260
261
262
        sys.exit(1)
    ## There must be at least 2 physicsShort sub-parts separated by '_': gens, (tune)+PDF, and process
    jo_physshortparts = jo_physshortpart.split("_")
    if len(jo_physshortparts) < 2:
        evgenLog.error(jofile + " has too few physicsShort fields separated by '_': should contain <generators>(_<tune+PDF_if_available>)_<process>. Please rename.")
        sys.exit(1)
    ## NOTE: a further check on physicsShort consistency is done below, after fragment loading
Chen Peng's avatar
Chen Peng committed
263
264
265
266
267
268
269
    check_jofiles="/cvmfs/atlas.cern.ch/repo/sw/Generators/MC16JobOptions/scripts/check_jo_consistency.py"
    if os.path.exists(check_jofiles):
        include(check_jofiles)
        check_naming(os.path.basename(jofile))
    else:
        evgenLog.error("check_jo_consistency.py not found")
        sys.exit(1)
270
271
272
273
274
275
276
277
278
279
280

## Include the JO fragment
include(jofile)

##==============================================================
## Config validation and propagation to services, generators, etc.
##==============================================================

## Announce start of JO checking
evgenLog.debug("****************** CHECKING EVGEN CONFIGURATION *****************")

281
282
283
if hasattr(runArgs,'inputGeneratorFile') and int(evgenConfig.inputFilesPerJob) == 0 : 
   evgenConfig.inputFilesPerJob = 1 

284
285
286
## Print out options
for opt in str(evgenConfig).split(os.linesep):
    evgenLog.info(opt)
287
evgenLog.info(".transform =                  Gen_tf")
288
289
290

## Sort and check generator name / JO name consistency
##
291
292
293
294
## Check that the common fragments are not obsolete:
if evgenConfig.obsolete:
    evgenLog.error("JOs or icludes are obsolete, please check them")
    sys.exit(1)
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
## Check that the generators list is not empty:
if not evgenConfig.generators:
    evgenLog.error("No entries in evgenConfig.generators: invalid configuration, please check your JO")
    sys.exit(1)
## Check for duplicates:
if len(evgenConfig.generators) > len(set(evgenConfig.generators)):
    evgenLog.error("Duplicate entries in evgenConfig.generators: invalid configuration, please check your JO")
    sys.exit(1)
## Sort the list of generator names into standard form
gennames = sorted(evgenConfig.generators, key=gen_sortkey)
## Check that the actual generators, tune, and main PDF are consistent with the JO name
if joparts[0].startswith("MC"): #< if this is an "official" JO
    genpart = jo_physshortparts[0]
#    genpart = genpart.replace("Py8", "Pythia8").replace("MG","MadGraph").replace("Ph","Powheg").replace("Hpp",Herwigpp").replace("H7",Herwig7").replace("Sh","Sherpa").replace("Ag","Alpgen").replace("Py","Pythia").replace("EG","EvtGen").replace("PG","ParticleGun")
    expectedgenpart = ''.join(gennames)
    ## We want to record that HERWIG was used in metadata, but in the JO naming we just use a "Herwig" label
    expectedgenpart = expectedgenpart.replace("HerwigJimmy", "Herwig")
    def _norm(s):
        # TODO: add EvtGen to this normalization for MC14?
        return s.replace("Photospp", "").replace("Photos", "").replace("TauolaPP", "").replace("Tauolapp", "").replace("Tauola", "")
    def _norm2(s):
        return s.replace("Py", "Pythia").replace("MG","MadGraph").replace("Ph","Powheg").replace("Hpp","Herwigpp").replace("H7","Herwig7").replace("Sh","Sherpa").replace("Ag","Alpgen").replace("EG","EvtGen").replace("PG","ParticleGun").replace("Gva","Geneva")
        
    def _short2(s):
         return s.replace("Pythia","Py").replace("MadGraph","MG").replace("Powheg","Ph").replace("Herwigpp","Hpp").replace("Herwig7","H7").replace("Sherpa","Sh").replace("Alpgen","Ag").replace("EvtGen","EG").replace("PG","ParticleGun").replace("Geneva","Gva")
     
#    if genpart != expectedgenpart and _norm(genpart) != _norm(expectedgenpart) and _norm2(genpart) != expectedgenpart and _norm2(genpart) != _norm(expectedgenpart):
#        evgenLog.error("Expected first part of JO name to be '%s' or '%s' or '%s', but found '%s'" % (_norm(expectedgenpart), expectedgenpart, _short2(expectedgenpart), genpart))
#        evgenLog.error("gennames '%s' " %(expectedgenpart))
#        sys.exit(1)

    if genpart != _norm(expectedgenpart)  and _norm2(genpart) != _norm(expectedgenpart):
        evgenLog.error("Expected first part of JO name to be '%s' or '%s', but found '%s'" % (_norm(expectedgenpart), _norm(_short2(expectedgenpart)), genpart))
        evgenLog.error("gennames '%s' " %(expectedgenpart))
        sys.exit(1)

Ewelina Maria Lobodzinska's avatar
Ewelina Maria Lobodzinska committed
331

332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
    del _norm
    ## Check if the tune/PDF part is needed, and if so whether it's present
    if not gens_notune(gennames) and len(jo_physshortparts) < 3:
        evgenLog.error(jofile + " with generators " + expectedgenpart +
                       " has too few physicsShort fields separated by '_'." +
                       " It should contain <generators>_<tune+PDF_<process>. Please rename.")
        sys.exit(1)

## Check the "--steering=afterburn" command line argument has been set if EvtGen is in the JO name
# Dont't have access to steering flag so check it's effect on output files
if gen_require_steering(gennames):
    if hasattr(runArgs, "outputEVNTFile") and not hasattr(runArgs, "outputEVNT_PreFile"):
        raise RuntimeError("'EvtGen' found in job options name, please set '--steering=afterburn'")


347
348
## Check that the evgenConfig.nEventsPerJob setting is acceptable
## nEventsPerJob defines the production event sizes and must be sufficiently "round"    
349
350
351
352
rounding = 0
if hasattr(runArgs,'inputGeneratorFile') and ',' in runArgs.inputGeneratorFile:   multiInput = runArgs.inputGeneratorFile.count(',')+1
else:
   multiInput = 0
353
354
355
356
357
358
# check if default nEventsPerJob used 
if not evgenConfig.nEventsPerJob:
    evgenLog.info('#############################################################')
    evgenLog.info(' !!!! no nEventsPerJob set !!!  The default 10000 used. !!! ') 
    evgenLog.info('#############################################################')
else:
359
    evgenLog.info(' nEventsPerJob = ' + str(evgenConfig.nEventsPerJob)  )
360
361

if evgenConfig.minevents > 0 :
362
    raise RuntimeError("evgenConfig.minevents is obsolete and should be removed from the JOs")
363

364
if evgenConfig.nEventsPerJob < 1:
365
366
367
    raise RuntimeError("evgenConfig.nEventsPerJob must be at least 1")
elif evgenConfig.nEventsPerJob > 100000:
    raise RuntimeError("evgenConfig.nEventsPerJob can be max. 100000")
368
else:
369
370
    allowed_nEventsPerJob_lt1000 = [1, 2, 5, 10, 20, 25, 50, 100, 200, 500, 1000]
    msg = "evgenConfig.nEventsPerJob = %d: " % evgenConfig.nEventsPerJob
371
372
373
374
375
376

    if evgenConfig.nEventsPerJob >= 1000 and evgenConfig.nEventsPerJob <=10000 and (evgenConfig.nEventsPerJob % 1000 != 0 or 10000 % evgenConfig.nEventsPerJob != 0) :
           msg += "nEventsPerJob in range [1K, 10K] must be a multiple of 1K and a divisor of 10K"
           raise RuntimeError(msg)
    elif evgenConfig.nEventsPerJob > 10000  and evgenConfig.nEventsPerJob % 10000 != 0:
           msg += "nEventsPerJob >10K must be a multiple of 10K"
377
           raise RuntimeError(msg)
378
379
    elif evgenConfig.nEventsPerJob < 1000 and evgenConfig.nEventsPerJob not in allowed_nEventsPerJob_lt1000:
           msg += "nEventsPerJob in range <= 1000 must be one of %s" % allowed_nEventsPerJob_lt1000
380
           raise RuntimeError(msg)
381
    postSeq.CountHepMC.RequestedOutput = evgenConfig.nEventsPerJob if runArgs.maxEvents == -1  else runArgs.maxEvents
382
    evgenLog.info('Requested output events =  '+str(postSeq.CountHepMC.RequestedOutput))
383
384
385
386
387

## Check that the keywords are in the list of allowed words (and exit if processing an official JO)
if evgenConfig.keywords:
    ## Get the allowed keywords file from the JO package if possibe
    # TODO: Make the package name configurable
388
    kwfile = "evgenkeywords.txt"
389
    kwpath = None
390
    for p in os.environ["DATAPATH"].split(":"):
391
392
393
394
395
396
397
        kwpath = os.path.join(p, kwfile)
        if os.path.exists(kwpath):
            break
        kwpath = None
    ## Load the allowed keywords from the file
    allowed_keywords = []
    if kwpath:
398
        evgenLog.info("evgenkeywords = "+kwpath)
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
        kwf = open(kwpath, "r")
        for l in kwf:
            allowed_keywords += l.strip().lower().split()
        #allowed_keywords.sort()
        ## Check the JO keywords against the allowed ones
        evil_keywords = []
        for k in evgenConfig.keywords:
            if k.lower() not in allowed_keywords:
                evil_keywords.append(k)
        if evil_keywords:
            msg = "evgenConfig.keywords contains non-standard keywords: %s. " % ", ".join(evil_keywords)
            msg += "Please check the allowed keywords list and fix."
            evgenLog.error(msg)
            if officialJO:
                sys.exit(1)
    else:
415
        evgenLog.warning("evgenkeywords = not found ")
416
417
418
419
420

## Check that the L1 and L2 keywords pairs are in the list of allowed words pairs (and exit if processing an official JO)
if evgenConfig.categories:
    ## Get the allowed categories file from the JO package if possibe
    # TODO: Make the package name configurable
421
    lkwfile = "CategoryList.txt"
422
    lkwpath = None
423
    for p in os.environ["DATAPATH"].split(":"):
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
        lkwpath = os.path.join(p, lkwfile)
        if os.path.exists(lkwpath):
            break
        lkwpath = None
## Load the allowed categories names from the file
    allowed_cat = []
    if lkwpath:
        with open(lkwpath, 'r') as catlist:
            for line in catlist:
               allowed_list = ast.literal_eval(line)
               allowed_cat.append(allowed_list)
# Print allowed categories and categories read from the JOs
#            print "allowed categories", allowed_list
#            print "evgenConfig.categories", evgenConfig.categories

        ## Check the JO categories against the allowed ones
        bad_cat =[]
        it = iter(evgenConfig.categories)
        for x in it:
           l1 = x
           l2 = next(it)
           if "L1:" in l2 and "L2:" in l1:
               l1, l2 = l2, l1
           print "first",l1,"second",l2
           bad_cat.extend([l1, l2])
           for a1,a2 in allowed_cat:
#               print "a1 ",a1,"l1 ",l1, "a2 ",a2,"l2 ",l2
               if l1.strip().lower()==a1.strip().lower() and l2.strip().lower()==a2.strip().lower():
                 bad_cat=[]
           if bad_cat:
               msg = "evgenConfig.categories contains non-standard category: %s. " % ", ".join(bad_cat)
               msg += "Please check the allowed categories list and fix."
               evgenLog.error(msg)
               if officialJO:
                   sys.exit(1)
    else:
460
        evgenLog.warning("Could not find CategoryList.txt file %s in DATAPATH" % lkwfile)
461

462
463
464
465
466
467
468
469
470
471
472
473
if hasattr( runArgs, "outputEVNTFile") or hasattr( runArgs, "outputEVNT_PreFile"):
    ## Configure POOL streaming to the output EVNT format file
    from AthenaPoolCnvSvc.WriteAthenaPool import AthenaPoolOutputStream
    from AthenaPoolCnvSvc.AthenaPoolCnvSvcConf import AthenaPoolCnvSvc
    #from PoolSvc.PoolSvcConf import PoolSvc
    svcMgr.AthenaPoolCnvSvc.CommitInterval = 10 #< tweak for MC needs
    if hasattr(runArgs, "outputEVNTFile"):
        poolFile = runArgs.outputEVNTFile
    elif hasattr(runArgs, "outputEVNT_PreFile"):
        poolFile = runArgs.outputEVNT_PreFile
    else:
        raise RuntimeError("Output pool file, either EVNT or EVNT_Pre, is not known.")
474

475
476
    #StreamEVGEN = AthenaPoolOutputStream("StreamEVGEN", runArgs.outputEVNTFile)
    StreamEVGEN = AthenaPoolOutputStream("StreamEVGEN", poolFile)
477

478
479
480
481
482
    StreamEVGEN.ForceRead = True
    StreamEVGEN.ItemList += ["EventInfo#*", "McEventCollection#*"]
    StreamEVGEN.RequireAlgs += ["EvgenFilterSeq"]
    ## Used for pile-up (remove dynamic variables except flavour labels)
    if evgenConfig.saveJets:
483
484
485
       for jetradius in [4,6]:
          StreamEVGEN.ItemList += ["xAOD::JetContainer#AntiKt{}TruthJets".format(jetradius)]
          StreamEVGEN.ItemList += ["xAOD::JetAuxContainer#AntiKt{}TruthJetsAux.TruthLabelID.PartonTruthLabelID".format(jetradius)]
486
487
488
    if evgenConfig.savePileupTruthParticles:
       StreamEVGEN.ItemList += ["xAOD::TruthParticleContainer#TruthPileupParticles*"]
       StreamEVGEN.ItemList += ["xAOD::TruthParticleAuxContainer#TruthPileupParticlesAux.*"]
489

490
491
    # Remove any requested items from the ItemList so as not to write out
    for removeItem in evgenConfig.doNotSaveItems: StreamEVGEN.ItemList.remove( removeItem )
492

493
494
    # Allow (re-)addition to the output stream
    for addItem in evgenConfig.extraSaveItems: StreamEVGEN.ItemList += [ addItem ]
495
496

## Set the run numbers
497
498
499
dsid = os.path.basename(runArgs.jobConfig[0])
if not dsid.isdigit():
    dsid = "999999"
500
svcMgr.EventSelector.RunNumber = int(dsid)
501
502
503
504
505
506
507

#correction of run number only in afterburn mode
#if postSeq.CountHepMC.CorrectRunNumber:
#    postSeq.CountHepMC.NewRunNumber = int(dsid)
#    evgenLog.info("new run number set to "+ dsid)
    

508
509
510
511
512
513
514
515
516
517
518
519
520
521
#runArgs.runNumber
# TODO: set EventType::mc_channel_number = runArgs.runNumber

## Include information about generators in metadata
import EventInfoMgt.EventInfoMgtInit
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["mc_channel_number",str(dsid)]
#                                         str(runArgs.runNumber) ]
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["lhefGenerator", '+'.join( filter( gens_lhef, gennames ) ) ]
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["generators", '+'.join(gennames)]
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["evgenProcess", evgenConfig.process]
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["evgenTune", evgenConfig.tune]
if hasattr( evgenConfig, "hardPDF" ) : svcMgr.TagInfoMgr.ExtraTagValuePairs += ["hardPDF", evgenConfig.hardPDF]
if hasattr( evgenConfig, "softPDF" ) : svcMgr.TagInfoMgr.ExtraTagValuePairs += ["softPDF", evgenConfig.softPDF]
if hasattr( runArgs, "randomSeed") :  svcMgr.TagInfoMgr.ExtraTagValuePairs += ["randomSeed", str(runArgs.randomSeed)]
Zach Marshall's avatar
Zach Marshall committed
522
if hasattr( runArgs, "AMITag") and runArgs.AMITag != "NONE": svcMgr.TagInfoMgr.ExtraTagValuePairs += ["AMITag", runArgs.AMITag]
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543

## Handle beam info
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["beam_energy", str(int(runArgs.ecmEnergy*Units.GeV/2.0))]
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["beam_type", 'collisions']

## Propagate energy argument to the generators
# TODO: Standardise energy setting in the GenModule interface
include("EvgenJobTransforms/Generate_ecmenergies.py")

## Process random seed arg and pass to generators
include("EvgenJobTransforms/Generate_randomseeds.py")

## Add special config option (extended model info for BSM scenarios)
svcMgr.TagInfoMgr.ExtraTagValuePairs += ["specialConfiguration", evgenConfig.specialConfig ]

## Remove TestHepMC if it's inappropriate for this generator combination
# TODO: replace with direct del statements in the generator common JO fragments?
if hasattr(testSeq, "TestHepMC") and not gens_testhepmc(evgenConfig.generators):
    evgenLog.info("Removing TestHepMC sanity checker")
    del testSeq.TestHepMC

Ewelina Maria Lobodzinska's avatar
Ewelina Maria Lobodzinska committed
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
##=============================================================
## Check release number
##=============================================================
# Function to check blacklist (from Spyros'es logParser.py)
def checkBlackList(relFlavour,cache,generatorName) :
    isError = None
    with open('/cvmfs/atlas.cern.ch/repo/sw/Generators/MC16JobOptions/common/BlackList_caches.txt') as bfile:
        for line in bfile.readlines():
            if not line.strip():
                continue
            # Blacklisted release flavours
            badRelFlav=line.split(',')[0].strip()
            # Blacklisted caches
            badCache=line.split(',')[1].strip()
            # Blacklisted generators
            badGens=line.split(',')[2].strip()
            
            used_gens = ','.join(generatorName)
            #Match Generator and release type e.g. AtlasProduction, MCProd
            if relFlavour==badRelFlav and cache==badCache and re.search(badGens,used_gens) is not None:
                if badGens=="": badGens="all generators"
                isError=relFlavour+","+cache+" is blacklisted for " + badGens
                return isError
    return isError
## Announce start of JO checkingrelease nimber checking
evgenLog.debug("****************** CHECKING RELEASE IS NOT BLACKLISTED *****************")
rel = os.popen("echo $AtlasVersion").read()
rel = rel.strip()
errorBL = checkBlackList("AthGeneration",rel,gennames)
if (errorBL): 
574
575
   raise RuntimeError("This run is blacklisted for this generator, please use a different one !! "+ errorBL)  
#    evgenLog.warning("This run is blacklisted for this generator, please use a different one !! "+ errorBL )
Ewelina Maria Lobodzinska's avatar
Ewelina Maria Lobodzinska committed
576

577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632

##==============================================================
## Handling of a post-include/exec args at the end of standard configuration
##==============================================================

if hasattr(runArgs, "postInclude"):
    for fragment in runArgs.postInclude:
        include(fragment)

if hasattr(runArgs, "postExec"):
    evgenLog.info("Transform post-exec")
    for cmd in runArgs.postExec:
        evgenLog.info(cmd)
        exec(cmd)


##==============================================================
## Show the algorithm sequences and algs now that config is complete
##==============================================================
acas.dumpMasterSequence()


##==============================================================
## Input file arg handling
##==============================================================

## Announce start of input file handling
evgenLog.debug("****************** HANDLING EVGEN INPUT FILES *****************")

## Dat files
datFile = None
if "McAtNlo" in evgenConfig.generators and "Herwig" in evgenConfig.generators:
    datFile = "inparmMcAtNlo.dat"
elif "Alpgen" in evgenConfig.generators:
    datFile = "inparmAlpGen.dat"
elif "Protos" in evgenConfig.generators:
    datFile = "protos.dat"
elif "ProtosLHEF" in evgenConfig.generators:
    datFile = "protoslhef.dat"
elif "AcerMC" in evgenConfig.generators:
    datFile = "inparmAcerMC.dat"
elif "CompHep" in evgenConfig.generators:
    datFile = "inparmCompHep.dat"

## Events files
eventsFile = None
if "Alpgen" in evgenConfig.generators:
    eventsFile = "alpgen.unw_events"
elif "Protos" in evgenConfig.generators: 
    eventsFile = "protos.events"
elif "ProtosLHEF" in evgenConfig.generators:
    eventsFile = "protoslhef.events"
elif "BeamHaloGenerator" in evgenConfig.generators:
    eventsFile = "beamhalogen.events"
elif "HepMCAscii" in evgenConfig.generators:
    eventsFile = "events.hepmc"
633
634
elif "ReadMcAscii" in evgenConfig.generators:
    eventsFile = "events.hepmc"
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
elif gens_lhef(evgenConfig.generators):
    eventsFile = "events.lhe"


## Helper functions for input file handling
def find_unique_file(pattern):
    "Return a matching file, provided it is unique"
    import glob
    files = glob.glob(pattern)
    ## Check that there is exactly 1 match
    if not files:
        raise RuntimeError("No '%s' file found" % pattern)
    elif len(files) > 1:
        raise RuntimeError("More than one '%s' file found" % pattern)
    return files[0]

# This function merges a list of input LHE file to make one outputFile.  The header is taken from the first
# file, but the number of events is updated to equal the total number of events in all the input files
def merge_lhe_files(listOfFiles,outputFile):
    if(os.path.exists(outputFile)):
      print "outputFile ",outputFile," already exists.  Will rename to ",outputFile,".OLD"
      os.rename(outputFile,outputFile+".OLD")
    output = open(outputFile,'w')
    holdHeader = ""
    nevents=0
    for file in listOfFiles:
       cmd = "grep /event "+file+" | wc -l"
       nevents+=int(subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True))

    for file in listOfFiles:
       inHeader = True
       header = ""
       print "*** Starting file ",file
       for line in open(file,"r"):
##        Reading first event signals that we are done with all the header information
##        Using this approach means the script will properly handle any metadata stored
##        at the beginning of the file.  Note:  aside from the number of events, no metadata
##        is updated after the first header is read (eg the random number seed recorded will be
##        that of the first file.
          if("<event" in line and inHeader):
             inHeader = False
             if(len(holdHeader)<1):
                holdHeader = header
                output.write(header)
             output.write(line)
##        each input file ends with "</LesHouchesEvents>".  We don't want to write this out until all
##        the files have been read.  The elif below writes out all the events.
          elif(not inHeader and not ("</LesHouchesEvents>" in line)):
              output.write(line)
          if(inHeader):
##           Format for storing number of events different in MG and Powheg 
             if("nevents" in line):
##              MG5 format is "n = nevents"
                tmp = line.split("=")
                line = line.replace(tmp[0],str(nevents))
             elif("numevts" in line):
##              Powheg format is numevts n
                tmp = line.split(" ")
                nnn = str(nevents)
                line = line.replace(tmp[1],nnn)
             header+=line
    output.write("</LesHouchesEvents>\n")
    output.close()


def mk_symlink(srcfile, dstfile):
    "Make a symlink safely"
    if dstfile:
        if os.path.exists(dstfile) and not os.path.samefile(dstfile, srcfile):
            os.remove(dstfile)
        if not os.path.exists(dstfile):
            evgenLog.info("Symlinking %s to %s" % (srcfile, dstfile))
            os.symlink(srcfile, dstfile)
        else:
            evgenLog.debug("Symlinking: %s is already the same as %s" % (dstfile, srcfile))

## Find and symlink dat and event files, so they are available via the name expected by the generator
if eventsFile or datFile:
    if not hasattr(runArgs, "inputGeneratorFile") or runArgs.inputGeneratorFile == "NONE":
714
        raise RuntimeError("%s needs input file (argument inputGeneratorFile)" % runArgs.jobConfig)
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
    if evgenConfig.inputfilecheck and not re.search(evgenConfig.inputfilecheck, runArgs.inputGeneratorFile):
        raise RuntimeError("inputGeneratorFile=%s is incompatible with inputfilecheck '%s' in %s" %
                           (runArgs.inputGeneratorFile, evgenConfig.inputfilecheck, runArgs.jobConfig))
#    inputroot = os.path.basename(runArgs.inputGeneratorFile).split("._")[0]
    if datFile:
      if ".tar" in os.path.basename(runArgs.inputGeneratorFile):
        inputroot = os.path.basename(runArgs.inputGeneratorFile).split(".tar.")[0]
      else:  
        inputroot = os.path.basename(runArgs.inputGeneratorFile).split("._")[0]

      realDatFile = find_unique_file('*%s*.dat' % inputroot)
      mk_symlink(realDatFile, datFile)
    if eventsFile:
#        realEventsFile = find_unique_file('*%s.*.ev*ts' % inputroot)
#        mk_symlink(realEventsFile, eventsFile)
        myinputfiles = runArgs.inputGeneratorFile
        genInputFiles = myinputfiles.split(',')
        numberOfFiles = len(genInputFiles)
        # if there is a single file, make a symlink.  If multiple files, merge them into one output eventsFile
        if(numberOfFiles<2):
           if ".tar" in os.path.basename(runArgs.inputGeneratorFile):
             inputroot = os.path.basename(runArgs.inputGeneratorFile).split(".tar.")[0]
           else:  
             inputroot = os.path.basename(runArgs.inputGeneratorFile).split("._")[0]

           if "events" in inputroot :
               inputroot = inputroot.replace(".events","")
           realEventsFile = find_unique_file('*%s.*ev*ts' % inputroot)
           mk_symlink(realEventsFile, eventsFile)
        else:
           allFiles = []
           for file in genInputFiles:
#             Since we can have multiple files from the same task, inputroot must include more of the filename
#             to make it unique
              if ".tar" in os.path.basename(runArgs.inputGeneratorFile):
                inputroot = os.path.basename(runArgs.inputGeneratorFile).split(".tar.")[0]
              else:  
                input0 = os.path.basename(file).split("._")[0]
                input1 = (os.path.basename(file).split("._")[1]).split(".")[0]
                inputroot = input0+"._"+input1
755
              print "INFO    inputroot = ",inputroot
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
              realEventsFile = find_unique_file('*%s.*ev*ts' % inputroot)
#             The only input format where merging is permitted is LHE
              with open(realEventsFile, 'r') as f:
                 first_line = f.readline()
                 if(not ("LesHouche" in first_line)):
                    raise RuntimeError("%s is NOT a LesHouche file" % realEventsFile)
                 allFiles.append(realEventsFile)
           merge_lhe_files(allFiles,eventsFile)

else:
    if hasattr(runArgs, "inputGeneratorFile") and runArgs.inputGeneratorFile != "NONE":
        raise RuntimeError("inputGeneratorFile arg specified for %s, but generators %s do not require an input file" %
                           (runArgs.jobConfig, str(gennames)))
    if evgenConfig.inputfilecheck:
        raise RuntimeError("evgenConfig.inputfilecheck specified in %s, but generators %s do not require an input file" %
                           (runArgs.jobConfig, str(gennames)))

## Check conf files, as above but for a different command line arg, and with omission allowed
774
775
776
777
#if hasattr(runArgs, "inputGenConfFile") and runArgs.inputGenConfFile != "NONE":
#    if evgenConfig.inputconfcheck and not re.search(evgenConfig.inputconfcheck, runArgs.inputGenConfFile):
#        raise RuntimeError("inputGenConfFile=%s is incompatible with inputconfcheck (%s) in %s" %
#                           (runArgs.inputGenConfFile, evgenConfig.inputconfcheck, runArgs.jobConfig))
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795

## Do the aux-file copying
if evgenConfig.auxfiles:
    from PyJobTransformsCore.trfutil import get_files
    get_files(evgenConfig.auxfiles, keepDir=False, errorIfNotFound=True)


##==============================================================
## Write out metadata for reporting to AMI
##==============================================================

def _checkattr(attr, required=False):
    if not hasattr(evgenConfig, attr) or not getattr(evgenConfig, attr):
        msg = "evgenConfig attribute '%s' not found." % attr
        if required:
            raise RuntimeError("Required " + msg)
        return False
    return True
796
797
if hasattr(runArgs, "outputTXTFile"):
    # counting the number of events in LHE output
798
    count_ev = 0
799
    with open(eventsFile) as f:
800
801
        lines = f.read()
        count_ev = lines.count('/event')
802
    print "MetaData: %s = %s" % ("Number of produced LHE events ", count_ev)
803
804
elif hasattr(runArgs, "inputGeneratorFile"):
    # counting the number of events in LHE output
805
    count_ev = 0
806
    with open(eventsFile) as f:
807
808
        lines = f.read()
        count_ev = lines.count('/event')
809
    print "MetaData: %s = %s" % ("Number of input LHE events ", count_ev)
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835

if _checkattr("description", required=True):
    msg = evgenConfig.description
    if _checkattr("notes"):
        msg += " " + evgenConfig.notes
    print "MetaData: %s = %s" % ("physicsComment", msg)
if _checkattr("generators", required=True):
#    print "MetaData: %s = %s" % ("generatorName", "+".join(gennames))
    gennamesvers=[]
    for item in gennames:
       genera = item.upper()
       generat = genera+"VER"
       if (generat in os.environ):
           gennamesvers.append(item+"(v."+os.environ[generat]+")")
#           gennamesvers.append(item+"."+os.environ[generat])
       else:
           gennamesvers.append(item)
    print "MetaData: %s = %s" % ("generatorName", "+".join(gennamesvers))    
if _checkattr("process"):
    print "MetaData: %s = %s" % ("physicsProcess", evgenConfig.process)
if _checkattr("tune"):
    print "MetaData: %s = %s" % ("generatorTune", evgenConfig.tune)
if _checkattr("hardPDF"):
    print "MetaData: %s = %s" % ("hardPDF", evgenConfig.hardPDF)
if _checkattr("softPDF"):
    print "MetaData: %s = %s" % ("softPDF", evgenConfig.softPDF)
Chen Peng's avatar
Chen Peng committed
836
837
if _checkattr("nEventsPerJob"):
    print "MetaData: %s = %s" % ("nEventsPerJob", evgenConfig.nEventsPerJob)
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
if _checkattr("keywords"):
    print "MetaData: %s = %s" % ("keywords", ", ".join(evgenConfig.keywords).lower())      
if _checkattr("categories"):
    print "MetaData: %s = %s" % ("categories", ", ".join(evgenConfig.categories))
if _checkattr("specialConfig"):
   print "MetaData: %s = %s" % ("specialConfig", evgenConfig.specialConfig)
# TODO: Require that a contact / JO author is always set
if _checkattr("contact"):
    print "MetaData: %s = %s" % ("contactPhysicist", ", ".join(evgenConfig.contact))
#if _checkattr( "randomSeed") :
print "MetaData: %s = %s" % ("randomSeed", str(runArgs.randomSeed))
 
    
    

# Output list of generator filters used
filterNames = [alg.getType() for alg in acas.iter_algseq(filtSeq)]
excludedNames = ['AthSequencer', 'PyAthena::Alg', 'TestHepMC']
filterNames = list(set(filterNames) - set(excludedNames))
print "MetaData: %s = %s" % ("genFilterNames", ", ".join(filterNames))


##==============================================================
## Dump evgenConfig so it can be recycled in post-run actions
##==============================================================

from PyJobTransformsCore.runargs import RunArguments
runPars = RunArguments()
866
runPars.nEventsPerJob = evgenConfig.nEventsPerJob
867
868
869
870
871
872
873
874
875
runPars.maxeventsstrategy = evgenConfig.maxeventsstrategy
with open("config.pickle", 'w') as f:
    import cPickle
    cPickle.dump(runPars, f)


##==============================================================
## Get ready to run...
##==============================================================
876
evgenLog.info("****************** STARTING EVENT GENERATION *****************")