diff --git a/Tools/PyJobTransforms/cmt/requirements b/Tools/PyJobTransforms/cmt/requirements new file mode 100755 index 0000000000000000000000000000000000000000..fb6acf2e3d0ca964ff4c4ca9ca9c80590142bb6d --- /dev/null +++ b/Tools/PyJobTransforms/cmt/requirements @@ -0,0 +1,53 @@ +package PyJobTransforms + +use AtlasPolicy AtlasPolicy-* +use AtlasPython AtlasPython-* External -no_auto_imports +use AtlasPyFwdBwdPorts * External -no_auto_imports + +apply_pattern declare_python_modules files="*.py" + +# default directory in current package to look for trf's. +macro tfs_dir '../scripts' + +apply_pattern declare_scripts files="${expand_files_cmd} find_*.py -s=../python" +apply_pattern generic_declare_for_link kind=runtime files='-s=../share *.db' prefix=share name=trf + +# Pattern to declare python job transforms. +# Each job transform normally has 2 components: +# - The python script (*_tf.py), defining the trf +# - The corresponding skeleton job options file (at least for athena transforms) +# The pattern takes 2 arguments: +# tfs = list of job transforms, by default taken from ../scripts +# These scripts will be installed in InstallArea/share/bin +# jo = list of skeleton joboptions files belonging to the job transforms +# By default taken from ../share +# These will be installed in the Installarea/jobOptions/<package> +# +pattern declare_job_transforms \ + private ; \ + apply_pattern generic_declare_for_link kind=tfs_exe files='-s=${tfs_dir} <tfs>' prefix=share/bin ; \ + apply_pattern generic_declare_for_link kind=tfs_pyt files='-s=${tfs_dir} <tfs>' prefix=python/<package> ; \ + apply_pattern generic_declare_for_link kind=tfs_jop files='-s=../share <jo>' prefix=jobOptions/<package> ; \ + macro <package>_job_transforms "`${expand_files_cmd} -r=$(<PACKAGE>ROOT) -d=<package> -s=${tfs_dir} <tfs>`" ; \ + apply_pattern install_python_init ; \ + macro_append <package>_python_init_dependencies " install_tfs_pyt " ; \ + end_private ; \ + macro_append all_job_transforms " ${<package>_job_transforms}" + +# For sample/utility tfs we need to do this after the pattern def +apply_pattern declare_job_transforms tfs='*_tf.py' jo='*.py' + +# RTT tests +apply_pattern generic_declare_for_link kind=test files='-s=../test test_*.py' prefix=share/JobTransforms/test +macro PyJobTransforms_TestConfiguration "../test/PyJobTransforms_TestConfiguration.xml" +apply_pattern declare_runtime_extras extras="../test/PyJobTransforms_TestConfiguration.xml" + +# Now make sure we generate the signature file with transform arguments in it +## DEPRECATED +#apply_pattern generic_declare_for_link kind=json files="../share/$(package)Signatures.json" prefix=share/JobTransforms +# +#private +#action makeTrfSignatures "../scripts/makeTrfSignatures.py --output ../share/$(package)Signatures.json" +#macro_append makeTrfSignatures_dependencies " install_tfs_jop install_python_modules " +#macro_append all_dependencies " makeTrfSignatures " +#macro_append check_install_json_dependencies " makeTrfSignatures " diff --git a/Tools/PyJobTransforms/python/__init__.py b/Tools/PyJobTransforms/python/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..74583d364ec2ca794156596c7254d9b234a940c6 --- /dev/null +++ b/Tools/PyJobTransforms/python/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + diff --git a/Tools/PyJobTransforms/python/performanceDataUploader.py b/Tools/PyJobTransforms/python/performanceDataUploader.py new file mode 100755 index 0000000000000000000000000000000000000000..060104189ad2141daea573278337489bfe485cb6 --- /dev/null +++ b/Tools/PyJobTransforms/python/performanceDataUploader.py @@ -0,0 +1,611 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.performanceDataUploader +# @brief Upload job performance stats to AMI + + +__all__ = ('timelimited', 'TimeLimitExpired','PerformanceUploader') +__version__ = '5 2011-07-24' + +from threading import Thread + + +try: # UGLY! private method __stop + _Thread_stop = Thread._Thread__stop +except AttributeError: # _stop in Python 3.0 + _Thread_stop = Thread._stop + + +class TimeLimitExpired(Exception): + '''Exception raised when time limit expires. + ''' + pass + + +def timelimited(timeout, function, *args, **kwds): + + class _Timelimited(Thread): + _error_ = TimeLimitExpired # assume timeout + _result_ = None + + def run(self): + try: + self._result_ = function(*args, **kwds) + self._error_ = None + except Exception, e: #XXX as for Python 3.0 + self._error_ = e + + def _stop(self): + # UGLY! force the thread to stop by (ab)using + # the private __stop or _stop method, but that + # seems to work better than these recipes + # <http://code.activestate.com/recipes/496960/> + # <http://sebulba.wikispaces.com/recipe+thread2> + if self.isAlive(): + _Thread_stop(self) + + + + if not hasattr(function, '__call__'): + raise TypeError('function not callable: %s' % repr(function)) + + if timeout is None: # shortcut + return function(*args, **kwds) + + if timeout < 0: + raise ValueError('timeout invalid: %s' % repr(timeout)) + + t = _Timelimited() + t.start() + t.join(timeout) + + if t._error_ is None: + return t._result_ + + if t._error_ is TimeLimitExpired: + t._stop() + raise TimeLimitExpired('timeout %r for %s' % (timeout, repr(function))) + else: + raise t._error_ + + +import os +import sys + + +class PerformanceUploader: + + def __init__(self, production): + self.isConnected=0 + self.connection=None + self.production=None + + fn=os.getenv('TZAMIPW') + if fn is not None: + print "upload will be done using direct db access." + return None + + print "upload will be done using AMI procedures." + if production: + self.amiclient=False + try: + from pyAMI.exceptions import AMI_Error + from pyAMI.client import AMIClient + self.amiclient = AMIClient() + self.production=production + except ImportError: + print "WARNING unable to import AMI from pyAMI with standard $PYTHONPATH." + except Exception, e: + print 'WARNING: ', e + print sys.exc_info()[0] + print sys.exc_info()[1] + except: + print "Unexpected error:", sys.exc_info()[0] + + + def __del__(self): + self.closeConnection() + + + def establishConnection(self): + if self.isConnected==True: + return True + + try: + import cx_Oracle + except ImportError: + print 'major fault: cx_Oracle not found.' + return False + + fn=os.getenv('TZAMIPW') + + if fn is None: + print 'Performance data will not be uploaded to AMI' + return False + + with open(fn, 'r') as f: + for line in f: + try: + kori = line[:line.index('/')] + lozi = line[line.index('/')+1:line.index('@')] + serv = line[line.index('@')+1:] + self.connection = cx_Oracle.connect(kori,lozi,serv,threaded=True) + print 'connection created.' + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py - problem in establishing connection to AMI db" + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + continue + except: + print "Unexpected error (establishConnection 1st step):", sys.exc_info()[0] + continue + + try: + self.cursor = cx_Oracle.Cursor(self.connection) + self.isConnected=True + # print 'cursor created.' + return True + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py - problem in creating cursor" + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error (establishConnection second step):", sys.exc_info()[0] + + self.connection.close() + self.connection=None + print 'connection closed' + + return False + + + def closeConnection(self): + if self.isConnected==True: + try: + self.cursor.close() + self.connection.close() + self.isConnected=False + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py - problem in closing connection to AMI db" + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error (closeConnection):", sys.exc_info()[0] + + + def uploadDataSize(self, format, runnr, stream, tag, ne, collectionSize): + + if self.production: + self.AMIuploadDataSize(format, runnr, stream, tag, ne, collectionSize) + return + if self.establishConnection()==False: + return + try: + self.cursor.callproc('CREATETAGANDSTREAM',[tag, stream]) + self.connection.commit() + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error (createTagAndStream):", sys.exc_info()[0] + + L=[] + for obj in collectionSize.keys(): + L.append([ne, runnr, collectionSize[obj], format, stream, tag, obj ]) + try: + insert = "INSERT INTO tempobjectsize ( events, runnumber, objectsize, datatype, stream, reprotag, objectname) VALUES (:1, :2, :3, :4, :5, :6, :7)" + self.cursor.prepare(insert) + self.cursor.executemany(None, L) + print 'Inserted: ' + str(self.cursor.rowcount) + ' rows.' + + self.connection.commit() + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error:", sys.exc_info()[0] + + print 'performanceDataUploader.py: Collection sizes uploaded to AMI.' + + + def AMIuploadDataSize(self, format, runnr, stream, tag, ne, collectionSize): + L=[] + + for obj in collectionSize.keys(): + L.append([int(ne), runnr, collectionSize[obj], format, stream, tag, obj ]) + + try: + #print myValues + argv=[] + argv.append("TempDataSize") + argv.append("output=xml") + argv.append("amiAutoAddUser") + argv.append("tag='"+tag+"'") + argv.append("stream='"+stream+"'") + argv.append("values="+L.__str__()) + # argv.append("AMIUser=ivukotic") + # argv.append("AMIPass=12345") + result = self.amiclient.execute(argv) + print result.output() + print 'Inserted: ' + str(len(L)) + ' rows.' + except Exception, msg: + print msg + except: + print "Unexpected error:", sys.exc_info()[0] + + print 'performanceDataUploader.py: Collection sizes uploaded to AMI.' + + + def uploadAlgoPerformances(self, tag, processingStep, stream, runnr, filename): + if self.establishConnection()==False: + return + + try: + # print 'If needed add tag and stream info' + self.cursor.callproc('CREATETAGANDSTREAM',[tag, stream]) + self.connection.commit() + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error (createTagAndStream):", sys.exc_info()[0] + + try: + # print 'If needed add processingStep' + self.cursor.callproc('CREATEPROCESSINGSTEP',[processingStep]) + self.connection.commit() + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error (createProcessingStep):", sys.exc_info()[0] + + print 'Sending algo data to DB.' + allInfo={} + currentAlg='' + events=0 + user_max=0 + user_total=0 + with open(filename, 'r') as f: + lines=f.readlines() + for l in lines: + if len(l)<2: continue + l=l.strip() + if l[0]=='[': + currentAlg=l.strip('[').rstrip(']') + if currentAlg.count('Callback')>0: + li=currentAlg.index('[0') + ri=currentAlg.index(':Call') + if li!=0 and ri!=0: + currentAlg=currentAlg.replace(currentAlg[li:ri],'') + if l.count('cpu_user_total'): user_total=float(l.replace('cpu_user_total = ','')) + if l.count('cpu_user_max'): user_max=float(l.replace('cpu_user_max = ','')) + if l.count('cpu_user_nbr'): + events=int(l.replace('cpu_user_nbr = ','')) + if user_total-user_max>0 and events>0: + allInfo[currentAlg]=[events, user_total-user_max] + + L=[] + for alg in allInfo.keys(): + # print runnr, tag, stream, processingStep, alg, allInfo[alg][0], allInfo[alg][1] + if len(alg)<150: + L.append([ allInfo[alg][1], allInfo[alg][0], runnr, processingStep, stream, tag, alg ]) + else: + print 'performanceDataUploader.py PROBLEM. THIS THING HAS EXTREMALY LONG NAME. PLEASE CHANGE IT.', alg + + insert = "INSERT INTO tempalgoperformance ( cputime, events, runnumber, algo_proc_step, stream, reprotag, algoname) VALUES (:1, :2, :3, :4, :5, :6, :7)" + + try: + self.cursor.prepare(insert) + self.cursor.executemany(None, L) + print 'Inserted: ' + str(self.cursor.rowcount) + ' rows.' + + self.connection.commit() + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error:", sys.exc_info()[0] + + print 'performanceDataUploader: Algo performance uploaded to AMI.' + + + def uploadJobPerformance(self, tag, processingStep, stream, runnr, filename): + if self.establishConnection()==False: + return + try: + print 'Sending Job data to DB.' + + import PerfMonComps.PerfMonSerializer as pmon_ser + hdr = pmon_ser.extract_pmon_files(filename) + data = pmon_ser.extract_pmon_infos(hdr['infos'].name) + data = data['perf_summary'] + alldata = [runnr, tag, stream, processingStep] + alldata.append( data['ini']['cpu'][0] ) + alldata.append( data['evt']['cpu'][0] ) + alldata.append( data['fin']['cpu'][0] ) + alldata.append( data['ini']['real'][0] ) + alldata.append( data['evt']['real'][0] ) + alldata.append( data['fin']['real'][0] ) + alldata.append( data['ini']['vmem'][0]*1024.0 ) + alldata.append( data['evt']['vmem'][0]*1024.0 ) + alldata.append( data['fin']['vmem'][0]*1024.0 ) + + alldata.append( float( (data['job']['statm']['VmPeak'].split(' '))[0] ) ) + alldata.append( float( (data['job']['statm']['VmRSS'].split(' '))[0] ) ) + alldata.append( data['job']['vmem_slope']) + alldata.append( data['evt']['nentries']) + print alldata + + self.cursor.callproc('ACCEPTJOBDATA',alldata) + self.connection.commit() + + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error:", sys.exc_info()[0] + print 'performanceDataUploader: Job performance uploaded to AMI.' + + + def uploadPerfMonSD(self, tag, processingStep, stream, runnr, filename): + + if self.production: + self.AMIuploadPerfMonSD( tag, processingStep, stream, runnr, filename) + return + + try: + if self.establishConnection()==False: + return + + import PerfMonComps.PMonSD as pm + d = pm.parse(filename) + if d is None: + print 'detailed algo information is missing from the PerfMonSD file' + return + d=d[0] + if 'steps_comps' not in d.keys(): + print 'detailed algo information is missing from the PerfMonSD file' + return + + print 'Sending PerfMonSD data to DB.' + + # print 'If needed add tag and stream info' + self.cursor.callproc('CREATETAGANDSTREAM',[tag, stream]) + self.connection.commit() + + # print 'If needed add processingStep' + self.cursor.callproc('CREATEPROCESSINGSTEP',[processingStep]) + self.connection.commit() + + + print 'Sending algo data to DB...' + + L=[] + i=d['steps_comps']['ini'] + e=d['steps_comps']['evt'] + f=d['steps_comps']['fin'] + + + for algoname in i.keys(): + if len(algoname) > 150: + print 'performanceDataUploader.py PROBLEM. THIS THING HAS EXTREMALY LONG NAME. PLEASE CHANGE IT.', algoname + continue + cpu=0;ini=0;fin=0 + + ini=i[algoname]['cpu'] + ev=i[algoname]['n'] + + if algoname in e.keys(): + cpu=e[algoname]['cpu'] + ev=e[algoname]['n'] + + if algoname in f.keys(): + fin=f[algoname]['cpu'] + + if cpu<0.01 and fin<0.01 and ini<0.01: continue + + L.append([cpu, ini, fin, ev, runnr, processingStep, stream, tag, algoname]) + #print [cpu, ini, fin, ev, runnr, processingStep, stream, tag, algoname] + + # print 'algos:', len(L) + + insert = "INSERT INTO tempalgoperformance ( cputime, cpuini, cpufin, events, runnumber, algo_proc_step, stream, reprotag, algoname) VALUES (:1, :2, :3, :4, :5, :6, :7, :8, :9)" + + self.cursor.prepare(insert) + self.cursor.executemany(None, L) + print 'Inserted: ' + str(self.cursor.rowcount) + ' rows.' + + self.connection.commit() + + print 'performanceDataUploader: Algo performance uploaded to AMI.' + + if 'special' not in d.keys(): + print 'job information is missing from the PerfMonSD file' + return + + print 'Uploading job info' + alldata = [runnr, tag, stream, processingStep] + s=d['special'] + + alldata.append( s['values']['cpu_bmips'] ) + alldata.append( s['timings']['evtloop_time']['cpu'] ) + alldata.append( s['timings']['overhead_time']['cpu'] ) + alldata.append( s['leaks']['leakperevt_evt11to50']['malloc'] ) + alldata.append( s['timings']['evtloop_time']['wall'] ) + alldata.append( s['timings']['overhead_time']['wall'] ) + + alldata.append( s['values']['vmem_mean'] ) + alldata.append( s['values']['jobcfg_walltime'] ) + alldata.append( s['leaks']['leakperevt_evt51plus']['malloc'] ) + + alldata.append( s['values']['vmem_peak'] ) + alldata.append( s['values']['rss_mean'] ) + alldata.append( s['leaks']['leakperevt_evt51plus']['vmem'] ) + + alldata.append( s['nevts'] ) + + #print alldata + + self.cursor.callproc('ACCEPTJOBDATA',alldata) + self.connection.commit() + + except cx_Oracle.DatabaseError, exc: + error, = exc.args + print "performanceDataUploader.py Oracle-Error-Code:", error.code + print "performanceDataUploader.py Oracle-Error-Message:", error.message + except: + print "Unexpected error:", sys.exc_info()[0] + + print 'performanceDataUploader: Job performance uploaded to AMI.' + + + + def AMIuploadPerfMonSD(self, tag, processingStep, stream, runnr, filename): + try: + + import PerfMonComps.PMonSD as pm + d = pm.parse(filename) + if d is None: + print 'detailed algo information is missing from the PerfMonSD file' + return + d=d[0] + if 'steps_comps' not in d.keys(): + print 'detailed algo information is missing from the PerfMonSD file' + return + + print 'Sending PerfMonSD data to AMI DB.' + + L=[] + + i=d['steps_comps']['ini'] + e=d['steps_comps']['evt'] + f=d['steps_comps']['fin'] + + + for algoname in i.keys(): + if len(algoname) > 150: + print 'performanceDataUploader.py PROBLEM. THIS THING HAS EXTREMALY LONG NAME. PLEASE CHANGE IT.', algoname + continue + cpu=0;ini=0;fin=0 + + ini=i[algoname]['cpu'] + ev=i[algoname]['n'] + + if algoname in e.keys(): + cpu=e[algoname]['cpu'] + ev=e[algoname]['n'] + + if algoname in f.keys(): + fin=f[algoname]['cpu'] + + if cpu<0.01 and fin<0.01 and ini<0.01: continue + + L.append([cpu, ini, fin, ev, runnr, processingStep, stream, tag, algoname]) + #L.append([cpu, ini, fin, ev, runnr, 'TEST', 'TEST', 'TEST', algoname]) + #print [cpu, ini, fin, ev, runnr, processingStep, stream, tag, algoname] + + try: + #print myValues + argv=[] + argv.append("TempAlgoPerformance") + argv.append("output=xml") + argv.append("tag='"+tag+"'") + argv.append("stream='"+stream+"'") + argv.append("processingStepName='"+processingStep+"'") + argv.append("values="+L.__str__()) + # argv.append("AMIUser=ivukotic") + # argv.append("AMIPass=12345") + result = self.amiclient.execute(argv) + print result.output() + print 'Inserted: ' + str(len(L)) + ' rows.' + + except Exception, msg: + print msg + except: + print "Unexpected error:", sys.exc_info()[0] + + print 'performanceDataUploader: Algo performance uploaded to AMI.' + + + if 'special' not in d.keys(): + print 'job information is missing from the PerfMonSD file' + return + + print 'Uploading job info' + alldata = [runnr, tag, stream, processingStep] + s=d['special'] + + alldata.append( s['values']['cpu_bmips'] ) + alldata.append( s['timings']['evtloop_time']['cpu'] ) + alldata.append( s['timings']['overhead_time']['cpu'] ) + alldata.append( s['leaks']['leakperevt_evt11to50']['malloc'] ) + alldata.append( s['timings']['evtloop_time']['wall'] ) + alldata.append( s['timings']['overhead_time']['wall'] ) + + alldata.append( s['values']['vmem_mean'] ) + alldata.append( s['values']['jobcfg_walltime'] ) + alldata.append( s['leaks']['leakperevt_evt51plus']['malloc'] ) + + alldata.append( s['values']['vmem_peak'] ) + alldata.append( s['values']['rss_mean'] ) + alldata.append( s['leaks']['leakperevt_evt51plus']['vmem'] ) + + alldata.append( s['nevts'] ) + + #print alldata + + argv=[] + argv.append("AcceptJobData") + argv.append("output=xml") + argv.append("alldata="+alldata.__str__()) +# argv.append("AMIUser=ivukotic") +# argv.append("AMIPass=12345") + result = self.amiclient.execute(argv) + print result.output() + print 'performanceDataUploader: Job performance uploaded to AMI.' + + except Exception, msg: + print 'performanceDataUploader: EXCEPTION in Algo performance uploading to AMI.',msg + except: + print "Unexpected error:", sys.exc_info()[0] + + + + + + +if __name__ == '__main__': + + import sys, time, threading + + + for t, s in ((2.0, 1), (1.0, 20)): # note, 20! + try: + print 'timelimit',t,'\texec time:',s + r = timelimited(t, time.sleep, s) + print 'done in time' + except Exception, e: #XXX as for Python 3.0 + r = e + print 'timeout' + + # check that all created threads stopped + for t in threading.enumerate(): + if t.isAlive() and repr(t).startswith('<_Timelimited('): + failed('thread %r still alive', t) + break + else: + print 'all _Timelimited threads stopped' + + + + diff --git a/Tools/PyJobTransforms/python/transform.py b/Tools/PyJobTransforms/python/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..14d874e57f779b747f295457adb6437f1b2fd2f4 --- /dev/null +++ b/Tools/PyJobTransforms/python/transform.py @@ -0,0 +1,634 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.transform +# +# @brief Main package for new style ATLAS job transforms +# @details Core class for ATLAS job transforms +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: transform.py 609252 2014-07-29 16:20:33Z wbreaden $ +# + +__version__ = '$Revision' +__doc__ = 'Core class for transforms' + +import argparse +import os +import os.path +import pprint +import re +import sys +import time +import traceback +import unittest + +from xml.etree import ElementTree + +import logging +msg = logging.getLogger(__name__) + +import PyJobTransforms.trfValidation as trfValidation +import PyJobTransforms.trfExceptions as trfExceptions + +from PyJobTransforms.trfSignal import setTrfSignalHandlers, resetTrfSignalHandlers +from PyJobTransforms.trfArgs import addStandardTrfArgs, addFileValidationArguments, addValidationArguments +from PyJobTransforms.trfLogger import setRootLoggerLevel, stdLogLevels +from PyJobTransforms.trfJobOptions import JobOptionsTemplate +from PyJobTransforms.trfArgClasses import trfArgParser, argFile, argHISTFile, argument +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfUtils import shQuoteStrings, listChildren, infanticide, pickledDump, JSONDump, cliToKey, convertToStr +from PyJobTransforms.trfReports import trfJobReport, defaultFileReport +from PyJobTransforms.trfExe import transformExecutor +from PyJobTransforms.trfGraph import executorGraph + +## @class transform +# @brief Core transform class +# @note Every transform should only have one transform class instantiated +class transform(object): + + ## @brief Initialise a job transform + # @param standardSignalHandlers Boolean to set signal handlers. Default @True. + # @param standardValidationArgs Boolean to set standard validation options. Default @True. + # @param trfName Name of the transform. Default is executable name with .py rstripped. + # @param executor Executor list + def __init__(self, standardSignalHandlers = True, standardTrfArgs = True, standardValidationArgs=True, + trfName = os.path.basename(sys.argv[0]).rsplit('.py', 1)[0], + executor = set([transformExecutor(),]), exeArgs = None, description = ''): + '''Transform class initialiser''' + msg.debug('Welcome to new transforms') + + ## @brief Get starting timestamp as early as possible + self._transformStart = os.times() + + ## Transform _name + self._name = trfName + + ## @note Holder for arguments this trf understands + # Use @c argparse.SUPPRESS to have non-given arguments unset, rather than None + # Support reading arguments from a file using the notation @c @file + self.parser = trfArgParser(description='Transform {0}. {1}'.format(trfName, description), + argument_default=argparse.SUPPRESS, + fromfile_prefix_chars='@') + + if standardTrfArgs: + addStandardTrfArgs(self.parser) + + if standardValidationArgs: + addValidationArguments(self.parser) + addFileValidationArguments(self.parser) + + + ## Argument dictionary for this transform + self._argdict = dict() + + ## Dsta dictionary place holder (this maps data types to their argFile instances) + self._dataDictionary = dict() + + + # Transform executor list - initalise with an empty set + self._executors = set() + self._executorDictionary = {} + + # If we were passed executors at construction time then append them to the set: + if executor is not None: + self.appendToExecutorSet(executor) + + ## Transform exit code/message holders + self._exitCode = None + self._exitMsg = None + + ## Report object for this transform + self._report = trfJobReport(parentTrf = self) + + # Setup standard signal handling if asked + if standardSignalHandlers: + setTrfSignalHandlers(self._exitWithReport) + msg.debug('Standard signal handlers established') + + + @property + def name(self): + return self._name + + @property + def exitCode(self): + if self._exitCode == None: + msg.warning('Transform exit code getter: _exitCode is unset, returning "TRF_UNKOWN"') + return trfExit.nameToCode('TRF_UNKOWN') + else: + return self._exitCode + + @property + def exitMsg(self): + if self._exitMsg == None: + msg.warning('Transform exit code getter: _exitMsg is unset, returning empty string') + return '' + else: + return self._exitMsg + + @property + def argdict(self): + return self._argdict + + @property + def dataDictionary(self): + return self._dataDictionary + + @property + def report(self): + return self._report + + @property + def transformStart(self): + return self._transformStart + + @property + def executors(self): + return self._executors + + def appendToExecutorSet(self, executors): + # Normalise to something iterable + if isinstance(executors, transformExecutor): + executors = [executors,] + elif not isinstance(executors, (list, tuple, set)): + raise trfExceptions.TransformInternalException(trfExit.nameToCode('TRF_INTERNAL'), + 'Transform was initialised with an executor which was not a simple executor or an executor set') + + # TRY TO DEPRECATE SETTING trf IN THE EXECUTOR - USE CONF! + # Executor book keeping: set parent link back to me for all executors + # Also setup a dictionary, indexed by executor name and check that name is unique + ## Setting conf here not working - too early to get the dataDictionary + for executor in executors: + executor.trf = self + if executor.name in self._executorDictionary: + raise trfExceptions.TransformInternalException(trfExit.nameToCode('TRF_INTERNAL'), + 'Transform has been initialised with two executors with the same name ({0}) - executor names must be unique'.format(value.name)) + self._executors.add(executor) + self._executorDictionary[executor.name] = executor + + + ## @brief Parse command line arguments for a transform + def parseCmdLineArgs(self, args): + msg.info('Transform command line was: %s' % ' '.join(shQuoteStrings(sys.argv))) + + try: + # Use the argparse infrastructure to get the actual command line arguments + self._argdict=vars(self.parser.parse_args(args)) + + # Now look for special arguments, which expand out to other parameters + # Note that the pickled argdict beats AMIConfig because dict.update() will overwrite + # (However, we defend the real command line against updates from either source) + extraParameters = {} + # AMI configuration? + if 'AMIConfig' in self._argdict: + msg.debug('Given AMI tag configuration {0}'.format(self._argdict['AMIConfig'])) + from PyJobTransforms.trfAMI import TagInfo + tag=TagInfo(self._argdict['AMIConfig'].value) + updateDict = {} + for k, v in dict(tag.trfs[0]).iteritems(): + # Convert to correct internal key form + updateDict[cliToKey(k)] = v + extraParameters.update(updateDict) + # Pickled arguments? + if 'argdict' in self._argdict: + try: + import cPickle as pickle + msg.debug('Given pickled arguments in {0}'.format(self._argdict['argdict'])) + argfile = open(self._argdict['argdict'], 'r') + extraParameters.update(pickle.load(argfile)) + argfile.close() + except Exception, e: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_ERROR'), 'Error when unpickling file {0}'.format(self._argdict['argdict'])) + # JSON arguments? + if 'argJSON' in self._argdict: + try: + import json + msg.debug('Given JSON encoded arguments in {0}'.format(self._argdict['argJSON'])) + argfile = open(self._argdict['argJSON'], 'r') + jsonParams = json.load(argfile) + msg.debug('Read: {0}'.format(jsonParams)) + extraParameters.update(convertToStr(jsonParams)) + argfile.close() + except Exception, e: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_ERROR'), 'Error when deserialising JSON file {0} ({1})'.format(self._argdict['argJSON'], e)) + # Process anything we found + for k,v in extraParameters.iteritems(): + msg.debug('Found this extra argument: {0} with value: {1} ({2})'.format(k, v, type(v))) + if k not in self.parser._argClass: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_ERROR'), 'Argument "{0}" not known (try "--help")'.format(k)) + if k in self._argdict: + msg.debug('Ignored {0}={1} as extra parameter because this argument was given on the command line.'.format(k, v)) + continue + # For callable classes we instantiate properly, otherwise we set the value for simple arguments + if '__call__' in dir(self.parser._argClass[k]): + self._argdict[k] = self.parser._argClass[k](v) + else: + self._argdict[k] = v + msg.debug('Argument {0} set to {1}'.format(k, self._argdict[k])) + + # Set the key name as an argument property - useful to be able to look bask at where this + # argument came from + for k, v in self._argdict.iteritems(): + if isinstance(v, argument): + v.name = k + + # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit + if 'dumpPickle' in self._argdict: + msg.info('Now dumping pickled version of command line to {0}'.format(self._argdict['dumpPickle'])) + pickledDump(self._argdict) + sys.exit(0) + + # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit + if 'dumpJSON' in self._argdict: + msg.info('Now dumping JSON version of command line to {0}'.format(self._argdict['dumpJSON'])) + JSONDump(self._argdict) + sys.exit(0) + + except trfExceptions.TransformArgException, e: + msg.critical('Argument parsing failure: {0!s}'.format(e)) + self._exitCode = e.errCode + self._exitMsg = e.errMsg + self._report.fast = True + self.generateReport() + sys.exit(self._exitCode) + + self.setGlobalLogLevel() + + + ## @brief Check transform argument dictionary and set the correct root logger option + def setGlobalLogLevel(self): + if 'verbose' in self._argdict: + setRootLoggerLevel(stdLogLevels['DEBUG']) + elif 'loglevel' in self._argdict: + if self._argdict['loglevel'] in stdLogLevels: + msg.info("Loglevel option found - setting root logger level to %s" % + logging.getLevelName(stdLogLevels[self._argdict['loglevel']])) + setRootLoggerLevel(stdLogLevels[self._argdict['loglevel']]) + else: + msg.warning('Unrecognised loglevel ({0}) given - ignored'.format(self._argdict['loglevel'])) + + + ## @brief Execute transform + # @detailed This function calls the actual transform execution class and + # sets \c self.exitCode and \c self.exitMsg transform data members. + # TODO: This method should be timed - try a decorator function for that + # @return None. + def execute(self): + msg.debug('Entering transform execution phase') + + try: + # Intercept a few special options here + if 'dumpargs' in self._argdict: + self.parser.dumpArgs() + sys.exit(0) + + if 'showSteps' in self._argdict: + for exe in self._executors: + print "Executor Step: {0} (alias {1})".format(exe.name, exe.substep) + sys.exit(0) + + # Graph stuff! + msg.info('Starting to resolve execution graph') + self._setupGraph() + msg.info('Execution graph resolved') + + if 'showGraph' in self._argdict: + print self._executorGraph + sys.exit(0) + + # Graph stuff! + msg.info('Starting to trace execution path') + self._tracePath() + msg.info('Execution path found with {0} step(s): {1}'.format(len(self._executorPath), + ' '.join([exe['name'] for exe in self._executorPath]))) + + if 'showPath' in self._argdict: + msg.debug('Execution path list is: {0}'.format(self._executorPath)) + # Now print it nice + print 'Executor path is:' + for node in self._executorPath: + print ' {0}: {1} -> {2}'.format(node['name'], list(node['input']), list(node['output'])) + sys.exit(0) + + msg.debug('Execution path is {0}'.format(self._executorPath)) + + # Prepare files for execution (separate method?) + for dataType in [ data for data in self._executorGraph.data if 'NULL' not in data ]: + if dataType in self._dataDictionary: + msg.debug('Data type {0} maps to existing argument {1}'.format(dataType, self._dataDictionary[dataType])) + else: + fileName = 'tmp.' + dataType + # How to pick the correct argFile class? + for (prefix, suffix) in (('tmp', ''), ('output', 'File'), ('input', 'File')): + stdArgName = prefix + dataType + suffix + if stdArgName in self.parser._argClass: + msg.debug('Matched data type {0} to argument {1}'.format(dataType, stdArgName)) + self._dataDictionary[dataType] = self.parser._argClass[stdArgName](fileName) + self._dataDictionary[dataType].io = 'temporary' + break + if dataType not in self._dataDictionary: + if 'HIST' in fileName: + self._dataDictionary[dataType] = argHISTFile(fileName, io='temporary', type=dataType.lower()) + + else: + self._dataDictionary[dataType] = argFile(fileName, io='temporary', type=dataType.lower()) + msg.debug('Did not find any argument matching data type {0} - setting to plain argFile: {1}'.format(dataType, self._dataDictionary[dataType])) + self._dataDictionary[dataType].name = fileName + + # Now we can set the final executor configuration properly, with the final dataDictionary + for executor in self._executors: + executor.conf.setFromTransform(self) + + + self.validateInFiles() + + for executionStep in self._executorPath: + msg.debug('Now preparing to execute {0}'.format(executionStep)) + executor = self._executorDictionary[executionStep['name']] + executor.preExecute(input = executionStep['input'], output = executionStep['output']) + executor.execute() + executor.postExecute() + executor.validate() + + self.validateOutFiles() + + except trfExceptions.TransformException as e: + msg.critical('Transform executor raised %s: %s' % (e.__class__.__name__, e.errMsg)) + self._exitCode = e.errCode + self._exitMsg = e.errMsg + # Try and write a job report... + self.generateReport(fast=True) + sys.exit(self._exitCode) + + # As the actual executor function is not part of this class we pass the transform as an argument + # This means that simple executors do not require explicit subclassing + msg.debug('Transform executor succeeded') + self._exitCode = 0 + self._exitMsg = trfExit.codeToName(self._exitCode) + + # Just in case any stray processes have been left behind... + if ('orphanKiller' in self._argdict): + infanticide(message=True, listOrphans=True) + else: + infanticide(message=True) + + ## @brief Setup the executor graph + # @note This function might need to be called again when the number of 'substeps' is unknown + # just based on the input data types - e.g., DigiMReco jobs don't know how many RDOtoESD + # steps they need to run until after digitisation. + def _setupGraph(self): + # Get input/output data + self._inputData = list() + self._outputData = list() + + for key, value in self._argdict.iteritems(): + # Note specifier [A-Za-z0-9_]+? makes this match non-greedy (avoid swallowing the optional 'File' suffix) + m = re.match(r'(input|output|tmp)([A-Za-z0-9_]+?)(File)?$', key) + # N.B. Protect against taking argunents which are not type argFile + if m and isinstance(value, argFile): + if m.group(1) == 'input': + self._inputData.append(m.group(2)) + else: + self._outputData.append(m.group(2)) + self._dataDictionary[m.group(2)] = value + + ## @note If we have no real data then add the pseudo datatype NULL, which allows us to manage + # transforms which can run without data + if len(self._inputData) is 0: + self._inputData.append('inNULL') + if len(self._outputData) is 0: + self._outputData.append('outNULL') + msg.debug('Transform has this input data: {0}; output data {1}'.format(self._inputData, self._outputData)) + + # Now see if we have any steering - manipulate the substep inputs and outputs before we + # setup the graph + if 'steering' in self._argdict.keys(): + msg.debug('Now applying steering to graph: {0}'.format(self._argdict['steering'].value)) + for substep, steeringValues in self._argdict['steering'].value.iteritems(): + foundSubstep = False + for executor in self._executors: + if executor.name == substep or executor.substep == substep: + foundSubstep = True + msg.debug('Updating {0} with {1}'.format(executor.name, steeringValues)) + # Steering consists of tuples with (in/out, +/-, datatype) + for steeringValue in steeringValues: + if steeringValue[0] == 'in': + startSet = executor.inData + else: + startSet = executor.outData + origLen = len(startSet) + msg.debug('Data values to be modified are: {0}'.format(startSet)) + if steeringValue[1] is '+': + startSet.add(steeringValue[2]) + if len(startSet) != origLen + 1: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'), + 'Attempting to add data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype already there?'.format(steeringValue[2], executor.name, steeringValue[1], startSet)) + else: + startSet.discard(steeringValue[2]) + if len(startSet) != origLen - 1: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'), + 'Attempting to remove data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype even present?'.format(steeringValue[2], executor.name, steeringValue[1], startSet)) + msg.debug('Updated data values to: {0}'.format(startSet)) + if not foundSubstep: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'), + 'This transform has no executor/substep {0}'.format(substep)) + + # Setup the graph and topo sort it + self._executorGraph = executorGraph(self._executors, self._inputData, self._outputData) + self._executorGraph.doToposort() + + + ## @brief Trace the path through the executor graph + # @note This function might need to be called again when the number of 'substeps' is unknown + # just based on the input data types - e.g., DigiMReco jobs don't know how many RDOtoESD + # steps they need to run until after digitisation. + def _tracePath(self): + self._executorGraph.findExecutionPath() + + self._executorPath = self._executorGraph.execution + if len(self._executorPath) is 0: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_SETUP'), + 'Execution path finding resulted in no substeps being executed' + '(Did you correctly specify input data for this transform?)') + # Tell the first executor that they are the first + self._executorDictionary[self._executorPath[0]['name']].conf.firstExecutor = True + + ## @brief Return the last executor which actually executed + # @return Last executor which has @c _hasExecuted == @c True, or the very first executor if we didn't even start yet + @property + def lastExecuted(self): + # Just make sure we have the path traced + if not hasattr(self, '_executorPath') or len(self._executorPath) is 0: + return None + + lastExecutor = self._executorDictionary[self._executorPath[0]['name']] + for executorStep in self._executorPath[1:]: + if self._executorDictionary[executorStep['name']].hasExecuted: + lastExecutor = self._executorDictionary[executorStep['name']] + return lastExecutor + + + ## @brief Transform report generator + # @param fast If True ensure that no external calls are made for file metadata (this is + # used to generate reports in a hurry after a crash or a forced exit) + # @param fileReport Dictionary giving the type of report to make for each type of file. + # This dictionary has to have all io types as keys and valid values are: + # @c None - skip this io type; @c 'full' - Provide all details; @c 'name' - only dataset and + # filename will be reported on. + # @param reportType Iterable with report types to generate, otherwise a sensible default + # is used (~everything, plus the Tier0 report at Tier0) + def generateReport(self, reportType=None, fast=False, fileReport = defaultFileReport): + msg.debug('Transform report generator') + + if 'reportType' in self._argdict: + if reportType is not None: + msg.info('Transform requested report types {0} overridden by command line to {1}'.format(reportType, self._argdict['reportType'].value)) + reportType = self._argdict['reportType'].value + + if reportType is None: + reportType = ['text', 'json', 'classic', 'pilotPickle'] + # Only generate the Tier0 report at Tier0 ;-) + # (It causes spurious warnings for some grid jobs with background files (e.g., digitisation) + if 'TZHOME' in os.environ: + reportType.append('gpickle') + + if 'reportName' in self._argdict: + baseName = classicName = self._argdict['reportName'].value + else: + baseName = 'jobReport' + classicName = 'metadata' + + try: + # Text + if reportType is None or 'text' in reportType: + self._report.writeTxtReport(filename='{0}.txt'.format(baseName), fast=fast, fileReport=fileReport) + # JSON + if reportType is None or 'json' in reportType: + self._report.writeJSONReport(filename='{0}.json'.format(baseName), fast=fast, fileReport=fileReport) + # Classic XML + if reportType is None or 'classic' in reportType: + self._report.writeClassicXMLReport(filename='{0}.xml'.format(classicName), fast=fast) + # Classic gPickle + if reportType is None or 'gpickle' in reportType: + self._report.writeGPickleReport(filename='{0}.gpickle'.format(baseName), fast=fast) + # Pickled version of the JSON report for pilot + if reportType is None or 'pilotPickle' in reportType: + self._report.writePilotPickleReport(filename='{0}Extract.pickle'.format(baseName), fast=fast, fileReport=fileReport) + + except trfExceptions.TransformTimeoutException, reportException: + msg.error('Received timeout when writing report ({0})'.format(reportException)) + msg.error('Report writing is aborted - sorry. Transform will exit with TRF_METADATA_CALL_FAIL status.') + if ('orphanKiller' in self._argdict): + infanticide(message=True, listOrphans=True) + else: + infanticide(message=True) + sys.exit(trfExit.nameToCode('TRF_METADATA_CALL_FAIL')) + + except trfExceptions.TransformException, reportException: + # This is a bad one! + msg.critical('Attempt to write job report failed with exception {0!s}: {1!s}'.format(reportException.__class__.__name__, reportException)) + msg.critical('Stack trace now follows:\n{0}'.format(traceback.format_exc())) + msg.critical('Job reports are likely to be missing or incomplete - sorry') + msg.critical('Please report this as a transforms bug!') + msg.critical('Before calling the report generator the transform status was: {0}; exit code {1}'.format(self._exitMsg, self._exitCode)) + msg.critical('Now exiting with a transform internal error code') + if ('orphanKiller' in self._argdict): + infanticide(message=True, listOrphans=True) + else: + infanticide(message=True) + sys.exit(trfExit.nameToCode('TRF_INTERNAL')) + + + # Description stolen from old trfs... + ## @brief Common signal handler. + # @details This function is installed in place of the default signal handler and attempts to terminate the + # transform gracefully. When a signal is caught by the transform, the stdout from the running application process + # (i.e. @c athena.py) is allowed to continue uninterrupted and write it's stdout to the log file (to retrieve + # the traceback) before the associated job report records the fact that a signal has been caught and complete + # the report accordingly. + # @param signum Signal number. Not used since this is a common handle assigned to predefined signals using the + # @c _installSignalHandlers(). This param is still required to satisfy the requirements of @c signal.signal(). + # @param frame Not used. Provided here to satisfy the requirements of @c signal.signal(). + # @return Does not return. Raises SystemExit exception. + # @exception SystemExit() + def _exitWithReport(self, signum, frame): + msg.critical('Transform received signal {0}'.format(signum)) + msg.critical('Stack trace now follows:\n{0!s}'.format(''.join(traceback.format_stack(frame)))) + self._exitCode = 128+signum + self._exitMsg = 'Transform received signal {0}'.format(signum) + + # Reset signal handlers now - we don't want to recurse if the same signal arrives again (e.g. multiple ^C) + resetTrfSignalHandlers() + + msg.critical('Attempting to write reports with known information...') + self.generateReport(fast=True) + if ('orphanKiller' in self._argdict): + infanticide(message=True, listOrphans=True) + else: + infanticide(message=True) + + sys.exit(self._exitCode) + + ## @brief Setter for transform's validation dictionary + # @details This function updates the validation dictionary for the transform, + # updating values which are passed in the \c newValidationOptions argument. + # @param newValidationOptions Dictionary (or tuples) to update validation + # dictionary with + # @return None + def updateValidationDict(self, newValidationOptions): + self.validation.update(newValidationOptions) + + ## @brief Getter function for transform validation dictionary + # @return Validiation dictionary + def getValidationDict(self): + return self.validation + + ## @brief Getter for a specific validation option + # @param key Validation dictionary key + # @return Valdiation key value or @c None if this key is absent + def getValidationOption(self, key): + if key in self.validation: + return self.validation[key] + else: + return None + + ## @brief Do transform validation + def doTrfValidation(self): + self._validation.runValidation() + + ## @brief Return a list of fileArgs used by the transform + # @param \c io Filter files by io attribute + # @return List of argFile instances + def getFiles(self, io = None): + res = [] + msg.debug('Looking for file arguments matching: io={0}'.format(io)) + for argName, arg in self._argdict.iteritems(): + if isinstance(arg, argFile): + msg.debug('Argument {0} is argFile type ({1!s})'.format(argName, arg)) + if io != None and arg.io != io: + continue + msg.debug('Argument {0} matches criteria'.format(argName)) + res.append(arg) + return res + + + def validateInFiles(self): + if (('skipFileValidation' in self._argdict and self._argdict['skipFileValidation'] is True) or + ('skipInputFileValidation' in self._argdict and self._argdict['skipInputFileValidation'] is True)): + msg.info('Standard input file validation turned off for transform %s.' % self.name) + else: + msg.info('Validating input files') + if 'parallelFileValidation' in self._argdict: + trfValidation.performStandardFileValidation(dict=self._dataDictionary, io='input', parallelMode=self._argdict['parallelFileValidation'].value ) + else: + trfValidation.performStandardFileValidation(dict=self._dataDictionary, io='input') + + def validateOutFiles(self): + if (('skipFileValidation' in self._argdict and self._argdict['skipFileValidation'] is True) or + ('skipOutputFileValidation' in self._argdict and self._argdict['skipOutputFileValidation'] is True)): + msg.info('Standard output file validation turned off for transform %s.' % self.name) + else: + msg.info('Validating output files') + if 'parallelFileValidation' in self._argdict: + trfValidation.performStandardFileValidation(dict=self._dataDictionary, io='output', parallelMode=self._argdict['parallelFileValidation'].value ) + else: + trfValidation.performStandardFileValidation(dict=self._dataDictionary, io='output') \ No newline at end of file diff --git a/Tools/PyJobTransforms/python/trfAMI.py b/Tools/PyJobTransforms/python/trfAMI.py new file mode 100644 index 0000000000000000000000000000000000000000..f5d8720832da4e0d5c335d5edf50cf2c4f7525c9 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfAMI.py @@ -0,0 +1,352 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfAMI +# @brief Utilities for configuration of transforms via AMI tags +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id$ + + + +import ast +import os +import traceback + +import logging +msg = logging.getLogger(__name__) + +from PyJobTransforms.trfExceptions import TransformAMIException +from PyJobTransforms.trfDefaultFiles import getInputFileName, getOutputFileName + +from PyJobTransforms.trfExitCodes import trfExit +errCode=trfExit.nameToCode('TRF_AMI_ERROR') + + +## @brief Stores the configuration of a transform +class TrfConfig: + def __init__(self): + self.name=None + self.release=None + self.physics={} + self.inFiles={} + self.outFiles={} + self.inDS=None + self.outfmts=[] + + def __iter__(self): + theDict=self.inFiles.copy() + theDict.update(self.outFiles) + theDict.update(self.physics) + for (k,v) in theDict.iteritems(): + yield k,v + + def __str__(self): + string = 'asetup '+self.release+'\n'+self.name + string += self._str_to_dict(self.physics) +'\n' + + string +='\nInput file arguments:\n' + if self.inFiles: + string += self._str_to_dict(self.inFiles) +'\n' + if self.inDS: + string +='\nExample input dataset: '+ self.inDS + '\n' + + string +='\nOutput file arguments:\n' + if self.outFiles: + string += self._str_to_dict(self.outFiles) + '\n' + if self.outfmts: + string += '\nPossible output data types: '+ str(self.outfmts) + '\n' + return string + + def _str_to_dict(self,adict): + string='' + for (k,v) in adict.iteritems(): + string +=" "+k+"='"+v.replace("'", "\\'")+"'" + return string + +## @brief Stores the information about a given tag. +class TagInfo: + def __init__(self,tag): + self._tag=tag + self._isProdSys=None + self._trfs=None + + @property + def isProdSys(self): + if self._isProdSys is None: + prodtags=getProdSysTagsCharacters() + if self._tag[0] in prodtags: + self._isProdSys=True + else: + self._isProdSys=False + return self._isProdSys + + + @property + def trfs(self): + if self._trfs is None: + if self.isProdSys: + self._trfs=getTrfConfigFromPANDA(self._tag) + else: + self._trfs=getTrfConfigFromAMI(self._tag) + return self._trfs + + + def __str__(self): + string = '\nInformation about tag '+self._tag+':\n' + + if self.isProdSys: + string +='This is a ProdSys tag. Input and output file arguments are likely to be missing because they are often not part of the tag definition.\n' + else: + string +='This is a T0 tag.\n' + + string +='This tag consists of ' + str(len(self.trfs)) + ' transform command(s).\n' + string += 'Transform commands follow below.\n' + string += 'Input and output file names (if present) are only suggestions.\n' + + for trf in self.trfs: + string+='\n'+str(trf)+'\n' + + return string + + def dump(self, file): + pass # not yet implemented + + +## @brief Get AMI client +# @param useReplica If @c True CERN replica is used instead of primary AMI. +# @returns pyAMI.client.AMIClient instance +def getAMIClient(useReplica=False): + msg.debug('Getting AMI client...') + + try: + from pyAMI.client import AMIClient + from pyAMI.auth import AMI_CONFIG + from pyAMI.exceptions import AMI_Error + from pyAMI import endpoint + from pyAMI.endpoint import get_endpoint, get_XSL_URL + except ImportError: + raise TransformAMIException(errCode, 'Import of pyAMI modules failed.') + + if useReplica: + endpoint.TYPE = 'replica' + else: + endpoint.TYPE = 'main' + msg.debug('Using endpoint %s ' % get_endpoint()) + msg.debug('Using xsl_url %s ' % get_XSL_URL()) + + amiclient = AMIClient() + return amiclient + +## @brief Get list of characters of ProdSys tags +# @returns list of characters +def getProdSysTagsCharacters(): + + msg.debug('Getting list of ProdSys tag characters...') + + defaultList=['y', 'p', 'e', 's', 'd', 'r', 't', 'a', 'b', 'w'] + + argv=["SearchQuery"] + argv.append("-sql=select productionStep.productionStepTag FROM productionStep WHERE ( ( productionStep.writeStatus LIKE 'valid%') AND productionStep.actor = 'TR')") + argv.append("project=Atlas_Production") + argv.append("processingStep=Atlas_Production") + + try: + from pyAMI.exceptions import AMI_Error + except ImportError: + msg.warning('Import of pyAMI modules failed (is your release setup correctly?).') + msg.warning('Returning default list of ProdSys tags.') + return defaultList + + try: + amiclient=getAMIClient(False) + result=amiclient.execute(argv) + except (AMI_Error, TransformAMIException): + msg.debug('An exception occured: %s' % traceback.format_exc()) + msg.warning('Getting ProdSysTags from primary AMI failed. Trying CERN replica.') + + try: + amiclient=getAMIClient(True) + result=amiclient.execute(argv) + except (AMI_Error, TransformAMIException): + msg.debug('An exception occured: %s' % traceback.format_exc()) + msg.warning('Getting ProdSysTags from CERN replica failed (do you have the necessary credentials to access AMI?).') + msg.warning('Returning default list of ProdSysTags.') + return defaultList + + return [ row['productionStepTag'] for row in result.rows() ] + + +## @brief Get PANDA client +# @returns cx_Oracle cursor instance +def getPANDAClient(): + msg.debug('Getting PANDA client...') + try: + import cx_Oracle + except ImportError: + raise TransformAMIException(errCode, 'Import of cx_Oracle failed (is Oracle setup on this machine?).') + + try: + cur = cx_Oracle.connect('atlas_grisli_r/panda_c10@adcr_panda').cursor() + except: + msg.debug('An exception occurred while connecting to PANDA database: %s' % traceback.format_exc()) + raise TransformAMIException(errCode, 'Failed to get PANDA client connection (N.B. this does not work from outside CERN).') + + return cur + +## @brief Un-escape information from PANDA +# @detail Provided by Pavel. +def ReadablePANDA(s): + return s.replace('%0B',' ').replace('%9B','; ').replace('%8B','"').replace('%3B',';').replace('%2C',',').replace('%2B','+') + + +## @brief Get information about a ProdSys tag from PANDA +# @param tag Tag for which information is requested +# @returns list of PyJoCbTransforms.trfAMI.TRFConfig instances +def getTrfConfigFromPANDA(tag): + + msg.debug('Using PANDA to get info about tag %s' % tag) + + try: + pandaclient=getPANDAClient() + pandaclient.execute("select trf,trfv,lparams,vparams,formats,cache from t_trf_config where tag='%s' and cid=%d" %(tag[:1],int(tag[1:]) ) ) + result=pandaclient.fetchone() + except: + msg.info('An exception occurred: %s' % traceback.format_exc()) + raise TransformAMIException(errCode, 'Getting tag info from PANDA failed.') + + if result is None: + raise TransformAMIException(errCode, 'Tag %s not found in PANDA database' % tag) + + msg.debug('Raw data returned from panda DB is:' + os.linesep + str(result)) + + trfn=result[0].split(',') + msg.debug('List of transforms: %s' % trfn) + trfv=result[1].split(',') + msg.debug('List of releases: %s' % trfv) + lparams=result[2].split(';') + msg.debug('List of arguments: %s' % lparams) + vparams=result[3].split(';') + msg.debug('List of argument values: %s' % vparams) + formats=result[4].split('.') + msg.debug('List of formats: %s' % formats) + cache=result[5].split(',') + msg.debug('List of caches: %s' % formats) + + + if not ( len(trfn) == len(trfv) == len(lparams) == len(vparams) ): + raise TransformAMIException(errCode, 'Inconsistency in number of trfs.') + + # Cache can be a single value, in which case it needs replicated for other + # transform steps, or it can be multivalued - great schema design guys :-( + if len(cache) != len(trfv): + if len(cache) == 1: + cache = cache * len(trfv) + else: + raise TransformAMIException(errCode, 'Inconsistency in number of caches entries vs. release numbers ({0}; {1}).'.format(cache, trfv)) + + listOfTrfs=[] + + for iTrf in range(len(trfn)): + + trf = TrfConfig() + trf.name =trfn[iTrf] + trf.release=trfv[iTrf] + "," + cache[iTrf] + + keys=lparams[iTrf].split(',') + values=vparams[iTrf].split(',') + + if ( len(keys) != len(values) ): + raise TransformAMIException(errCode, 'Inconsistency in number of arguments.') + + physics = dict( (k, ReadablePANDA(v) ) for (k,v) in zip(keys, values)) + # Hack to correct trigger keys being stored with spaces in panda + for k, v in physics.iteritems(): + if 'triggerConfig' in k or 'triggerConfigByRun' in k: + if ' ' in v: + physics[k] = v.replace(' ', ',') + msg.warning('Attempted to correct illegal trigger configuration string: {0} -> {1}'.format(v, physics[k])) + + msg.debug("Checking for pseudo-argument internal to ProdSys...") + if 'extraParameter' in physics: + val=physics.pop('extraParameter') + msg.debug("Removed extraParamater=%s from arguments." % val) + + msg.debug("Checking for input/output file arguments...") + for arg in physics.keys(): + if arg.lstrip('-').startswith('input') and arg.endswith('File'): + value=physics.pop(arg) + msg.debug("Found input file argument %s=%s." % (arg,value) ) + fmt=arg.lstrip('-').replace('input','').replace('File','') + trf.inFiles[arg]=getInputFileName(arg) + elif arg.lstrip('-').startswith('output') and arg.endswith('File'): + value=physics.pop(arg) + msg.debug("Found output file argument %s=%s." % (arg,value) ) + fmt=arg.lstrip('-').replace('output','').replace('File','') + trf.outFiles[arg]=getOutputFileName(fmt) + + msg.debug("Checking for not set arguments...") + for arg,value in physics.items(): + if value=="NONE" or value=="none": + val=physics.pop(arg) + msg.debug("Removed %s=%s from arguments." % (arg, val) ) + + trf.physics=physics + + listOfTrfs.append(trf) + + listOfTrfs[0].inDS=None # not yet implemented + listOfTrfs[-1].outfmts=formats + + return listOfTrfs + + +## @brief Get information about a T0 tag from AMI +# @param tag Tag for which information is requested +# @returns list of PyJoCbTransforms.trfAMI.TRFConfig instances +def getTrfConfigFromAMI(tag): + + msg.debug('Using AMI to get info about tag %s' % tag) + + try: + from pyAMI.exceptions import AMI_Error + from pyAMI.query import get_configtags + except ImportError: + raise TransformAMIException(errCode, 'Import of pyAMI modules failed.') + + try: + amiclient=getAMIClient(False) + result=get_configtags(amiclient, tag) + except (AMI_Error, TransformAMIException) as e: + + if 'Invalid configTag found' in e.args[0]: + raise TransformAMIException(errCode, 'Tag %s not found in AMI database.' % tag) + + msg.debug('An exception occured: %s' % traceback.format_exc()) + msg.warning('Getting tag info from primary AMI failed. Trying CERN replica.') + + try: + amiclient=getAMIClient(True) + result=get_configtags(amiclient, tag) + except (AMI_Error, TransformAMIException): + msg.debug('An exception occured: %s' % traceback.format_exc()) + raise TransformAMIException(errCode, 'Getting tag info from AMI failed.') + + msg.debug('Raw result from AMI is: %s ' % result) + + if ( result[0]!={'amiTag': tag } and result[0]!={'configTag': tag }): + msg.warning('Got unexpected result from AMI: %s when asking for tag %s' % (result[0],tag)) + raise TransformAMIException(errCode, 'Getting tag info from AMI failed.') + + trf = TrfConfig() + trf.name=result[1]['transformation'] + trf.release=result[1]['SWReleaseCache'].replace('AtlasProduction-','') + trf.physics=dict( (k, str(v)) for (k,v) in ast.literal_eval(result[1]['phconfig']).iteritems() ) + trf.inFiles=dict( (k, getInputFileName(k)) for k in ast.literal_eval(result[1]['inputs']).iterkeys() ) + outputs=ast.literal_eval(result[1]['outputs']) + trf.outFiles=dict( (k, getOutputFileName(outputs[k]['dstype']) ) for k in outputs.iterkeys() ) + trf.outfmts=[ outputs[k]['dstype'] for k in outputs.iterkeys() ] + + return [ trf ] + + + + diff --git a/Tools/PyJobTransforms/python/trfArgClasses.py b/Tools/PyJobTransforms/python/trfArgClasses.py new file mode 100644 index 0000000000000000000000000000000000000000..b16dbcf9bbaed5b6fde9b533b90196e175e3b928 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfArgClasses.py @@ -0,0 +1,2196 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfArgClasses +# @brief Transform argument class definitions +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfArgClasses.py 609252 2014-07-29 16:20:33Z wbreaden $ + + +import argparse +import bz2 +import copy +import glob +import io +import os +import re +import subprocess +import sys +import traceback +import types +import uuid + +from fnmatch import fnmatch + +import logging +msg = logging.getLogger(__name__) + +import PyJobTransforms.trfExceptions as trfExceptions + +from PyJobTransforms.trfFileUtils import athFileInterestingKeys, AthenaFileInfo, NTUPEntries, HISTEntries, urlType, ROOTGetSize +from PyJobTransforms.trfUtils import call, cmpMetadata, cliToKey +from PyJobTransforms.trfExitCodes import trfExit as trfExit +from PyJobTransforms.trfDecorators import timelimited +import os + + +## @class argFactory +# @brief Factory class used to generate argument class instances for argparse +class argFactory(object): + def __init__(self, genclass, *args, **kwargs): + msg.debug('Initialised class %s with args=%s; kwargs=%s' % (genclass, args, kwargs)) + self._genclass = genclass + self._args = args + self._kwargs = kwargs + + def __call__(self, valueString=None): + msg.debug('Called class %s with value=%s; args=%s; kwargs=%s' % (self._genclass, valueString, self._args, self._kwargs)) + + # Wrap this step in our own try/except because if this goes wrong we want to see the exception + # instead of having it masked by the argparse module + try: + # Passing None suppresses the value passed to the constructor, thus the constructor's own + # default value is used - generally this will match the default value for the underlying + # python object + if valueString is None: + obj = self._genclass(*self._args, **self._kwargs) + else: + obj = self._genclass(valueString, *self._args, **self._kwargs) + except Exception, e: + msg.fatal('Got this exception raised when calling object factory: {0}'.format(e)) + raise + return obj + + def __str__(self): + return 'argFactory for {0}, args {1}, kwargs {2}'.format(self._genclass, self._args, self._kwargs) + + +## @class argument +# @brief Basic argument class holding a value which can be get and set +# @note Any argument type is supported +class argument(object): + + ## @brief Initialise argument class + # @param value Initial value to be set + # @param runarg Boolean specifiying if this is a run time argument + # (specifically passed to athena by the AthenaExecutor, other executors may do the same). + # @param name Name for this argument + def __init__(self, value = None, runarg = True, name = None): + self._runarg = runarg + self._name = name + self._desc = {} + + ## @note We have a default of None here, but all derived classes should + # definitely have their own value setter and translate this value to something + # sensible for their underlying value type. + # N.B. As most argument classes use this default constructor it @b must + # call the @c @value.setter function! + self.value = value + + ## @brief Return argument value + # @returns Current value + @property + def value(self): + return self._value + + ## @brief Set argument value + # @note No conversion or coercion done + @value.setter + def value(self, value): + self._value = value + + ## @brief Return runarg status + @property + def isRunarg(self): + return self._runarg + + ## @brief Name getter + @property + def name(self): + return self._name + + ## @brief Name setter + @name.setter + def name(self, value): + self._name = value + + @property + def prodsysDescription(self): + return {} + + ## @brief String conversion of our value + def __str__(self): + return '{0}: Value {1} (isRunArg={2})'.format(self.__class__.__name__, self._value, self._runarg) + + ## @brief Repr conversion of our value + def __repr__(self): + return repr(self.value) + + ## Comparison is based on value attribute + def __cmp__(self, other): + if self.value == other.value: + return 0 + if self.value > other.value: + return 1 + return -1 + + +## @brief String type argument +class argString(argument): + + ## @brief Class initialisation + # @param value Initial value to be set + # @param runarg Boolean specifiying if this is a run time argument + # (specifically passed to athena by the AthenaExecutor, other executors may do the same). + # @param name Name for this argument + # @param choices Optional list of strings from which value must be set + def __init__(self, value = None, runarg = True, name = None, choices = None): + self._choices = choices + super(argString, self).__init__(value = value, runarg = runarg, name=name) + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + ## @brief Argument value setter + # @details Sets value directly if it's a @c str, otherwise call the @c str() converter + @value.setter + def value(self, value): + if value == None: + # For strings, None maps to '' + self._value = '' + else: + # Call string converter - should work for everything... + self._value = str(value) + if self._choices: + if self._value not in self._choices: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CHOICES_FAIL'), 'Converted argument {0} for {1} did not match any valid choice: {2}'.format(value, self._name, self._choices)) + + ## @brief Choices getter + # @returns Valid choices list + def choices(self): + return self._choices + + # prodsysDescription: human readable from of type plus possible values + @property + def prodsysDescription(self): + self._desc = {'type' : 'string'} + if self._choices: + self._desc['choices'] = self._choices + return self._desc + + ## @note String value can be printed directly + def __str__(self): + return self.value + + +## @brief Int type argument +class argInt(argument): + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + ## @brief Argument value setter + # @details Sets value directly if it's an @c int, otherwise call the @c int() converter. + # @throws trfExceptions.TransformArgException if @c int() conversion fails + @value.setter + def value(self, value): + if value == None: + # For ints None maps to 0 + self._value = 0 + else: + if isinstance(value, int): + self._value = value + else: + ## We try hard to convert the value we were given - anything @c int() swallows we accept + try: + self._value = int(value) + except ValueError, e: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert value {0} to int'.format(value)) + + # prodsysDescription: human readable from of type plus possible values + @property + def prodsysDescription(self): + self._desc = {'type' : 'INT'} + return self._desc + + + +## @brief Float type argument +class argFloat(argument): + + ## @brief Float argument constructor + # @param value Initial value to set, default None, + # @param min Minimum value for any limits that need to be set + # @param max Maximum value for any limits that need to be set + def __init__(self, value=None, min=None, max=None, runarg=True, name=None): + self._min = min + self._max = max + self._desc = {} + super(argFloat, self).__init__(value = value, runarg = runarg, name=name) + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + @property + def prodsysDescription(self): + self._desc = {'type' : 'float'} + if self._min: + self._desc['min'] = self._min + if self._max: + self._desc['max'] = self._max + return self._desc + + ## @brief Setter function. + # @details The argument can be anything, if it is not of type @c float, it will attempt to convert using @c float(). + # @param value Value to set. + # @throw trfExceptions.TransformArgException Exception thrown if the float conversion fails. + # @throw trfExceptions.TransformArgException Exception thrown if value is outside of the given range. + @value.setter + def value(self, value=None): + # Default value will be 0.0 or self._min (if defined) + if value == None: + if self._min is not None: + self._value = self._min + else: + self._value = 0.0 + else: + try: + if isinstance(value, float): + self._value = value + else: + self._value = float(value) + except ValueError: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Failed to convert %s to a float' % str(value)) + + if (self._min != None and self.value < self._min) or (self._max != None and self._value > self._max): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_OUT_OF_RANGE'), + 'argFloat value out of range: %g is not between %s and %s' % + (self.value, self._min, self._max)) + + +## @brief Boolean type argument +class argBool(argument): + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + ## @brief Value setter function. + # @details The argument can be a @c bool or a string with value @c true or @c false (case independent). + # @param value Value to set. + # @throw trfExceptions.TransformArgException Exception thrown if the float conversion fails. + # @throw trfExceptions.TransformArgException Exception thrown if value is outside of the given range. + @value.setter + def value(self, value): + # Default value matches the python bool() constructor + if value == None: + self._value = False + else: + if isinstance(value, bool): + self._value = value + else: + self._value = strToBool(value) + + # prodsysDescription: human readable from of type plus possible values + @property + def prodsysDescription(self): + self._desc = {'type' : 'bool'} + return self._desc + +## @brief List of string arguments +class argList(argument): + + ## @brief List of string arguments + # @details Argument holding a list of string values + # @param splitter The splitter argument determines how the string is split (default by a comma) + # @param value Initial value to set (note default is an empty list []) + # @param supressEmptyStrings If empty strings are removed from the list + # (Can be used to easily set an empty list from the CLI) + def __init__(self, value = [], supressEmptyStrings = True, splitter=',', runarg=True, name=None): + self._splitter = splitter + self._supressEmptyStrings = supressEmptyStrings + + super(argList, self).__init__(value = value, runarg = runarg, name=name) + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + # prodsysDescription: human readable from of type plus possible values + @property + def prodsysDescription(self): + self._desc = {'type' : 'list'} + if self._supressEmptyStrings: + self._desc['supress Empty Strings'] = self._supressEmptyStrings + return self._desc + + + ## @brief Argument value setter + # @detail If we get a list then set that, otherwise we split a string on the splitter character + @value.setter + def value(self, value): + if isinstance(value, (list, tuple)): + self._value = list(value) + elif value==None: + self._value = [] + return + else: + try: + if self._supressEmptyStrings: + self._value = [ v for v in value.split(self._splitter) if v is not '' ] + else: + self._value = value.split(self._splitter) + except AttributeError: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Failed to convert %s to a list' % str(value)) + + ## @brief Append a value to the list + # @param addme Item to add + def append(self, addme): + self._value.append(addme) + + ## @brief String conversion + # @details Flatten values and space separate + # @warning Values sets with spaces will be confused by this conversion + def __str__(self): + return " ".join(self._value) + + ## @brief Repr conversion + # @details Return a python parsable string + def __repr__(self): + return '[' + ','.join([ repr(s) for s in self._value ]) + ']' + + +## @brief List of int arguments +class argIntList(argList): + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + + ## @brief Argument value setter + # @detail If we get a list then set that, otherwise we split a string on the splitter character + # @throw trfExceptions.TransformArgException Exception thrown if any list member is not an @c int + # @throw trfExceptions.TransformArgException Exception thrown is any @c int() conversion fails + @value.setter + def value(self, value): + if isinstance(value, list): + for v in value: + if not isinstance(v, (int, long)): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_ERROR'), + 'Illegal argument %s in list of ints' % str(el)) + self._value = value + elif value==None: + self._value = [] + return + else: + try: + if self._supressEmptyStrings: + self._value = [ v for v in value.split(self._splitter) if v is not '' ] + else: + self._value = value.split(self._splitter) + self._value = [ int(el) for el in self._value ] + except (AttributeError, ValueError): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Failed to convert %s to a list of ints' % str(value)) + + def __str__(self): + return " ".join([ str(el) for el in self._value ]) + + @property + def prodsysDescription(self): + self._desc = {'type' : 'list', 'subtype' : 'INT'} + return self._desc + + +# Special list which stores k:v pairs, where the value is an float (used for AthenaMP merge target size) +class argKeyFloatValueList(argList): + ## @brief Dictionary of key value arguments, where the values are floats + # @param splitter The splitter argument determines how the string is split (default by a comma) + # @param kvsplitter The kvsplitter argument determines how the key and value are split (default by a colon) + # @param value Initial value to set (note default is an empty dictionary {}) + # @param supressEmptyStrings If empty strings are removed from the list + # (Can be used to easily set an empty list from the CLI) + def __init__(self, value = {}, supressEmptyStrings = True, splitter=',', kvsplitter=":", runarg=True, name=None): + self._splitter = splitter + self._kvsplitter = kvsplitter + self._supressEmptyStrings = supressEmptyStrings + + super(argList, self).__init__(value = value, runarg = runarg, name=name) + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + ## @brief Argument value setter + # @detail If we get a dict then set that, otherwise we split a string on the splitter character + # and then on the kvsplitter into key and value, with the value being converted to float + # @throw trfExceptions.TransformArgException Exception thrown if any dictionary @c key:value member is not @c string:int + # @throw trfExceptions.TransformArgException Exception thrown if any @c int() conversion fails of the kvsplitter is not found + @value.setter + def value(self, value): + if isinstance(value, dict): + for k, v in value.iteritems(): + if not isinstance(k, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_ERROR'), + 'Illegal key argument type {0} in dictionary for argKeyFloatValueList'.format(k)) + if not isinstance(v, float): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_ERROR'), + 'Illegal value argument type {0} in dictionary for argKeyFloatValueList'.format(v)) + self._value = value + elif value==None: + self._value = {} + return + else: + self._value = {} + try: + if self._supressEmptyStrings: + kvlist = [ v for v in value.split(self._splitter) if v is not '' ] + else: + kvlist = value.split(self._splitter) + for item in kvlist: + k, v = item.split(self._kvsplitter, 1) + self._value[k] = float(v) + except (AttributeError, ValueError): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Failed to convert {0} to a dictionary of string:int values'.format(value)) + + def __str__(self): + return str(self._value) + + @property + def prodsysDescription(self): + self._desc = {'type' : 'list', 'subtype' : 'key:floatValue'} + return self._desc + + +## @brief File argument class +# @details Inherits from argList +class argFile(argList): + + ## @brief Initialise an argFile + # @param io @c input, @c output or @c temporary file, default @c output. + # @param splitter changes character a string list is split on, default is a comma (see argList). + # @param type Datatype in this instance - this should be the @b major datatype (ESD, AOD, etc). + # @param subtype The data subtype, which should match the DATATYPE portion of the corresponding + # argument name, e.g., outputDESD_SGLMUFile -> DESD_SGLMU + # @param guid This is a non-standard option and allows the GUID for files without an intrinsic GUID + # to be set explicitly at initialisation. The parameter should be a dictionary, keyed by filename, + # which contains the GUID string, e.g., <tt>{'file1' : '930de3de-de8d-4819-9129-beef3bb4fadb', 'file2' : ... }</tt> + # @param multipleOK Explicit declaration of whether multiple arguments are allowed; default is @c True for @c input, + # @c False for @c output and @c temporary + # @param name The corresponding key for this argument in the argdict of the transform (e.g., @c inputESDFile) + # @param executor List of execution substeps where this file type should be added explicitly (e.g., minbias HITS + # for digitisation) + # @param mergeTargetSize Target merge size if this instance supports a selfMerge method. Value is in bytes, with the + # special values: @c -1 Always merge to a single file, @c 0 never merge these files + # @note When used in argument parser, set nargs='+' to get auto-concatenation of multiple arguments (should be used + # when @c multipleOK is @c True) + def __init__(self, value=list(), type=None, subtype=None, io = 'output', splitter=',', runarg = True, guid = None, + multipleOK = None, name=None, executor=list(), mergeTargetSize=-1): + # Set these values before invoking super().__init__ to make sure they can be + # accessed in our setter + self._dataset = None + self._urlType = None + self._type = type + self._subtype = subtype + self._guid = guid + self._mergeTargetSize = mergeTargetSize + + # User setter to get valid value check + self.io = io + + self._exe = executor + + ## @note Variable listing set of file metadata which corresponds to this class, + # Key is the metadata variable name, the value is the function to call to + # populate/refresh this metadata value. Function must take a single parameter, + # which is the list of files to get metadata for. It must return a metadata + # dictionary: {file1 : {key1: value1, key2: value2}, file2: ...} + # Keys which start with _ are for transform internal use and should not appear in + # jobReports + + self._metadataKeys = {'file_size': self._getSize, + 'integrity': self._getIntegrity, + 'file_guid': self._generateGUID, + '_exists': self._exists, + } + + if multipleOK is None: + if self._io is 'input': + self._multipleOK = True + else: + self._multipleOK = False + else: + self._multipleOK = multipleOK + + ## @note N.B. argList.__init__ calls _our_ setter for us + super(argFile, self).__init__(value=value, splitter=splitter, runarg=runarg, name=name) + + + ## @brief Argument value getter + # @returns Current value + @property + def value(self): + return self._value + + ## @brief Argument value setter + # @detail Calls the valueSetter function with the standard options + @value.setter + def value(self, value): + self.valueSetter(value) + + ## @brief multipleOK getter + # @returns Current value + @property + def multipleOK(self): + return self._multipleOK + + ## @brief multipleOK value setter + @multipleOK.setter + def multipleOK(self, value): + self._multipleOK = value + + ## @brief mergeTargeSize value getter + @property + def mergeTargetSize(self): + return self._mergeTargetSize + + ## @brief mergeTargeSize value setter + @mergeTargetSize.setter + def mergeTargetSize(self, value): + if value==None: + self._mergeTargetSize = 0 + else: + self._mergeTargetSize = value + + @property + def prodsysDescription(self): + if type(self._type) is types.DictType: + if self._type=={}: + self._desc = {'type' : 'file', 'subtype' : "NONE" } + else: + self._desc = {'type' : 'file', 'subtype' : dict((str(k).upper(), str(v).upper()) for (k,v) in self._type.iteritems())} + else: + self._desc = {'type' : 'file', 'subtype' : str(self._type).upper()} + self._desc['multiple'] = self._multipleOK + return self._desc + + ## @brief Executor status getter + @property + def executor(self): + return self._exe + + ## @brief Set the argFile value, but allow parameters here + # @note Normally athena only takes a single value for an output file, but when AthenaMP runs + # it can produce multiple output files - this is allowed by setting <tt>allowMultiOutputs = False</tt> + # @note The setter protects against the same file being added multiple times + def valueSetter(self, value): + ## @note Impossible to use the argList.value setter here? super() doesn't seem to get it right: + # <tt>super(argFile, self).value = value</tt> results in an attribute error + + prodSysPattern = re.compile(r'(?P<prefix>.*)\[(?P<expand>[\d\.,_]+)\](?P<suffix>.*)') + + ## @note First do parsing of string vs. lists to get list of files + if isinstance(value, (list, tuple)): + self._value = list(value) + elif value==None: + self._value = [] + return + else: + try: + # If there is a prodsys glob in the game we turn off splitting + prodsysGlob = prodSysPattern.match(value) + if prodsysGlob and self._splitter is ',': + msg.debug('Detected prodsys glob - normal splitting is disabled') + self._value = [value] + else: + self._value = value.split(self._splitter) + except (AttributeError, TypeError): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Failed to convert %s to a list' % str(value)) + + ## @note Check for duplicates (N.B. preserve the order, just remove the duplicates) + deDuplicatedValue = [] + for file in self._value: + if file not in deDuplicatedValue: + deDuplicatedValue.append(file) + else: + msg.warning("Removing duplicated file {0} from file list".format(file)) + if len(self._value) != len(deDuplicatedValue): + self._value = deDuplicatedValue + msg.warning('File list after duplicate removal: {0}'.format(self._value)) + + ## @note Now look for dataset notation + # TODO - handle reset of filenames from AthenaMP without trashing DS name + self._getDatasetFromFilename(reset = True) + + # Find our URL type (if we actually have files!) + # At the moment this is assumed to be the same for all files in this instance + # although in principle one could mix different access methods in the one input file type + if len(self._value) > 0: + self._urlType = urlType(self._value[0]) + else: + self._urlType = None + + ## @brief Input file globbing and expansion + if self._io == 'input': + ## @note TODO: Non-posix URLs + # Problem is not so much the [] expansion, but the invisible .N attempt number + # One can only deal with this with a listdir() functionality + # N.B. Current transforms only do globbing on posix fs too (see trfutil.expandStringToList()) + if self._urlType is 'posix': + msg.debug('Found POSIX filesystem input - activating globbing') + newValue = [] + for filename in self._value: + ## @note Weird prodsys style globbing... + # This has the format: + # @c prefix._[NNN,MMM,OOO,PPP].suffix (@c NNN, etc. are numbers) + # However an invisible .N attempt number also needs to be appended before doing real globbing + prodsysGlob = prodSysPattern.match(filename) + if prodsysGlob: + msg.debug('Detected [MMM,NNN,OOO] style prodsys globbing for {0}'.format(filename)) + msg.debug('Prefix: {0}; Numerical expansion: {1}; Suffix: {2}'.format(prodsysGlob.group('prefix'), prodsysGlob.group('expand'), prodsysGlob.group('suffix'))) + numbers = prodsysGlob.group('expand').split(',') + for number in numbers: + # Add a final '.*' to match against the .AttemptNumber invisible extension + globName = prodsysGlob.group('prefix') + str(number) + prodsysGlob.group('suffix') + '*' + msg.debug('Will try globbing against {0}'.format(globName)) + globbedNames = glob.glob(globName) + if len(globbedNames) > 1: + msg.warning('Warning - matched multiple filenames ({0}) when adding the .AttemptNumber to {1}'.format(globbedNames, globName)) + elif len(globbedNames) == 0: + msg.warning('Warning - matched NO filenames when adding the .AttemptNumber to {0}'.format(globName)) + newValue.extend(globbedNames) + else: + # Simple case + newValue.extend(glob.glob(filename)) + if len(self._value) > 0 and len(newValue) is 0: + # Woops - no files! + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'Input file argument(s) {0!s} globbed to NO input files - probably the file(s) are missing'.format(self._value)) + self._value = newValue + msg.debug ('File input is globbed to %s' % self._value) + + elif self._urlType is 'root': + msg.debug('Found root filesystem input - activating globbing') + newValue = [] + for filename in self._value: + + ## @note Weird prodsys style globbing... + # This has the format: + # @c prefix._[NNN,MMM,OOO,PPP].suffix (@c NNN, etc. are numbers) + # However an invisible .N attempt number also needs to be appended before doing real globbing + prodsysGlob = prodSysPattern.match(filename) + if prodsysGlob: + theNameList = [filename] + i = 0 + msg.debug('Try to split input string if more than one file is given') + if ',root:' in filename: + theNameList = filename.split(',root:') + for name in theNameList: + if not name.startswith('root:'): + name = 'root:'+name + theNameList[i] = name + i = i + 1 + + msg.debug('Split input string into files: {0}'.format(theNameList)) + for fileName in theNameList: + prodsysGlob = prodSysPattern.match(fileName) + msg.debug('Detected [MMM,NNN,OOO] style prodsys globbing for {0}'.format(fileName)) + msg.debug('Prefix: {0}; Numerical expansion: {1}; Suffix: {2}'.format(prodsysGlob.group('prefix'), prodsysGlob.group('expand'), prodsysGlob.group('suffix'))) + numbers = prodsysGlob.group('expand').split(',') + for number in numbers: + # Add a final '.*' to match against the .AttemptNumber invisible extension + globName = prodsysGlob.group('prefix') + str(number) + prodsysGlob.group('suffix') + msg.debug('Will try globbing against {0}'.format(globName)) + globbedNames =[globName]# glob.glob(globName) + if len(globbedNames) > 1: + msg.warning('Warning - matched multiple filenames ({0}) when adding the .AttemptNumber to {1}'.format(globbedNames, globName)) + elif len(globbedNames) == 0: + msg.warning('Warning - matched NO filenames when adding the .AttemptNumber to {0}'.format(globName)) + newValue.extend(globbedNames) + + else: + # Simple case + if not(str(filename).endswith('/')) and '*' not in filename and '?' not in filename: + msg.debug('Seems that only one file was given: {0}'.format(filename)) + newValue.extend(([filename])) + else: + #hopefully this recognised wildcards... + path = filename + fileMask = '' + if '*' in filename or '?' in filename: + msg.debug('Split input into path for listdir() and a filemask to select available files.') + path = filename[0:filename.rfind('/')+1] + msg.debug('path: {0}'.format(path)) + fileMask = filename[filename.rfind('/')+1:len(filename)] + msg.debug('Will select according to: {0}'.format(fileMask)) + + msg.debug('eos command is hard coded - check if it is executable') + cmd = ['/afs/cern.ch/project/eos/installation/atlas/bin/eos.select' ] + if not os.access ('/afs/cern.ch/project/eos/installation/atlas/bin/eos.select', os.X_OK ): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'Input file argument(s) {0!s} globbed to NO input files - The eos ls command could not be executed.', cmd) + + cmd.extend(['ls']) + cmd.extend([path]) + + myFiles = [] + try: + proc = subprocess.Popen(args = cmd,bufsize = 1, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + while proc.poll() is None: + line = proc.stdout.readline() + if line: + if "root" in line: + myFiles += [str(path)+str(line.rstrip('\n'))] + + rc=proc.returncode + + + if not rc==0: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'Input file argument(s) {0!s} globbed to NO input files - ls command failed') + msg.debug('Executed eos ls, found:') + patt = re.compile(fileMask.replace('*','.*').replace('?','.')) + for srmFile in myFiles: + if fileMask is not '': + if(patt.search(srmFile)) is not None: + #if fnmatch.fnmatch(srmFile, fileMask): + msg.debug('match: ',srmFile) + newValue.extend(([srmFile])) + + + + else: + newValue.extend(([srmFile])) + + msg.debug('Selected files: ', newValue) + except (AttributeError, TypeError, OSError): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_RUNTIME_ERROR'), + 'Failed to convert %s to a list' % str(value)) + if len(self._value) > 0 and len(newValue) is 0: + # Woops - no files! + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'Input file argument(s) {0!s} globbed to NO input files - ls command failed') + self._value = newValue + msg.debug ('File input is globbed to %s' % self._value) + # Check if multiple outputs are ok for this object + elif self._multipleOK == False and len(self._value) > 1: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), + 'Multiple file arguments are not supported for {0} (was given: {1}'.format(self, self._value)) + + # Reset the self._fileMetadata dictionary + self._resetMetadata() + + @property + def io(self): + return (self._io) + + @io.setter + def io(self, value): + if value not in ('input', 'output', 'temporary'): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_RUNTIME_ERROR'), + 'File arguments must be specified as input, output or temporary - got {0}'.format(value)) + self._io = value + + @property + def dataset(self): + return self._dataset + + @property + def type(self): + return self._type + + @type.setter + def type(self, value): + self._type = value + + @property + def subtype(self): + return self._subtype + + @subtype.setter + def subtype(self, value): + self._subtype = value + + ## @brief Name getter + @property + def name(self): + return self._name + + ## @brief Name setter + # @note This property setter will also set the type and subtype of the argFile + # if they are not yet set. This means that for most arguments the type and subtype + # are automatically set correctly. + @name.setter + def name(self, value): + self._name = value + m = re.match(r'(input|output|tmp.)([A-Za-z0-9_]+?)(File)?$', value) + if m: + msg.debug("ArgFile name setter matched this: {0}".format(m.groups())) + if self._type is None: + dtype = m.group(2).split('_', 1)[0] + # But DRAW/DESD/DAOD are really just RAW, ESD, AOD in format + if re.match(r'D(RAW|ESD|AOD)', dtype): + dtype = dtype[1:] + msg.debug("Autoset data type to {0}".format(dtype)) + self._type = dtype + if self._subtype is None: + msg.debug("Autoset data subtype to {0}".format(m.group(2))) + self._subtype = m.group(2) + else: + msg.debug("ArgFile name setter did not match against '{0}'".format(value)) + + + ## @brief Returns the whole kit and kaboodle... + # @note Populates the whole metadata dictionary for this instance + @property + def metadata(self): + self.getMetadata() + return self._fileMetadata + + ## @brief Return total number of events in all constituent files + @property + def nentries(self): + return self.getnentries() + + ## @brief Explicit getter, offering fast switch + def getnentries(self, fast=False): + totalEvents = 0 + for file in self._value: + events = self.getSingleMetadata(file, 'nentries', populate = not fast) + if events is None: + msg.debug('Got events=None for file {0} - returning None for this instance'.format(file)) + return None + if events is 'UNDEFINED': + msg.debug('Got events=UNDEFINED for file {0} - returning UNDEFINED for this instance'.format(file)) + return 'UNDEFINED' + if not isinstance(events, (int, long)): + msg.warning('Got unexpected events metadata for file {0}: {1!s} - returning None for this instance'.format(file, events)) + return None + totalEvents += events + + return totalEvents + + + ## @brief Resets all metadata files in this instance + # @details Metadata dictionary entry is reset for any files given (default all files) + # and any files that are no longer in this instance have any metadata removed + # (useful for self merging). + # @note Metadata is set to @c {}, except for the case when an explicit GUID option was given + def _resetMetadata(self, files=[]): + if files == [] or '_fileMetadata' not in dir(self): + self._fileMetadata = {} + for file in self.value: + self._fileMetadata[file] = {} + else: + for file in files: + if file in self.value: + self._fileMetadata[file] = {} + elif file in self._fileMetadata: + del self._fileMetadata[file] + msg.debug('Metadata dictionary now {0}'.format(self._fileMetadata)) + + # If we have the special guid option, then manually try to set GUIDs we find + if self._guid is not None: + msg.debug('Now trying to set file GUID metadata using {0}'.format(self._guid)) + for file, guid in self._guid.iteritems(): + if file in self._value: + self._fileMetadata[file]['file_guid'] = guid + else: + msg.warning('Explicit GUID {0} was passed for file {1}, but this file is not a member of this instance'.format(guid, file)) + + ## @brief Return specific keys for specific files + # @param files List of files to return metadata for (default - all files in this instance) + # @param metadataKeys Keys to return (default - all keys valid for this class of files) + # @param maskMetadataKeys Keys to NOT return (useful when metadataKeys is left as default) + # @param populate If missing keys should be generated by calling the population subroutines + # @param flush If cached data should be flushed and the generators rerun + def getMetadata(self, files = None, metadataKeys = None, maskMetadataKeys = None, populate = True, flush = False): + # Normalise the files and keys parameter + if files is None: + files = self._value + elif isinstance(files, str): + files = (files,) + msg.debug('getMetadata will examine these files: {0!s}'.format(files)) + + if metadataKeys is None: + metadataKeys = self._metadataKeys.keys() + elif isinstance(metadataKeys, str): + metadataKeys = (metadataKeys,) + if maskMetadataKeys is not None: + for key in maskMetadataKeys: + if key in metadataKeys: + metadataKeys.remove(key) + msg.debug('getMetadata will retrieve these keys: {0!s}'.format(metadataKeys)) + + if flush is True: + msg.debug('Flushing cached metadata values') + self._resetMetadata() + + if populate is True: + msg.debug('Checking metadata values') + self._readMetadata(files, metadataKeys) + + metadata = {} + for file in files: + metadata[file] = {} + for mdkey in metadataKeys: + try: + metadata[file][mdkey] = self._fileMetadata[file][mdkey] + except KeyError: + # This should not happen, unless we skipped populating + if populate: + msg.error('Did not find metadata key {0!s} for file {1!s} - setting to None'.format(mdkey, file)) + metadata[file][mdkey] = None + return metadata + + ## @brief Convenience function to extract a single metadata key for a single file + # @details Retrieves a single metadata item for a single file, returning it directly + # @return Single metadata value + # @param files Files to return metadata for + # @param metadataKey Keys to return + # @param populate If missing key should be generated by calling the population subroutines + # @param flush If cached data should be flushed and the generator rerun + def getSingleMetadata(self, file, metadataKey, populate = True, flush = False): + if not (isinstance(file, str) and isinstance(metadataKey, str)): + raise trfExceptions.TransformInternalException(trfExit.nameToCode('TRF_INTERNAL'), + 'Illegal call to getSingleMetadata function: {0!s} {1!s}'.format(file, metadataKey)) + md = self.getMetadata(files = file, metadataKeys = metadataKey, populate = populate, flush = flush) + return md[file][metadataKey] + + + ## @brief Check metadata is in the cache or generate it if it's missing + # @details + # @return: dictionary of files with metadata, for any unknown keys 'UNDEFINED' is returned + def _readMetadata(self, files, metadataKeys): + msg.debug('Retrieving metadata keys {1!s} for files {0!s}'.format(files, metadataKeys)) + for file in files: + if file not in self._fileMetadata: + self._fileMetadata[file] = {} + for file in files: + # Always try for a simple existence test first before producing misleading error messages + # from metadata populator functions + if '_exists' not in self._fileMetadata[file]: + self._metadataKeys['_exists'](files) + if self._fileMetadata[file]['_exists'] is False: + # N.B. A log ERROR message has printed by the existence test, so do not repeat that news here + for key in metadataKeys: + if key is not '_exists': + self._fileMetadata[file][key] = None + else: + # OK, file seems to exist at least... + for key in metadataKeys: + if key not in self._metadataKeys: + msg.debug('Metadata key {0} is unknown for {1}'.format(key, self.__class__.__name__)) + self._fileMetadata[file][key] = 'UNDEFINED' + else: + if key in self._fileMetadata[file]: + msg.debug('Found cached value for {0}:{1} = {2!s}'.format(file, key, self._fileMetadata[file][key])) + else: + msg.debug('No cached value for {0}:{1}. Calling generator function {2} ({3})'.format(file, key, self._metadataKeys[key].func_name, self._metadataKeys[key])) + try: + # For efficiency call this routine with all files we have + self._metadataKeys[key](files) + except trfExceptions.TransformMetadataException, e: + msg.error('Calling {0!s} raised an exception: {1!s}'.format(self._metadataKeys[key].func_name, e)) + if key not in self._fileMetadata[file]: + msg.warning('Call to function {0} for {1} file {2} failed to populate metadata key {3}'.format(self._metadataKeys[key].__name__, self.__class__.__name__, file, key)) + self._fileMetadata[file][key] = None + msg.debug('Now have {0}:{1} = {2!s}'.format(file, key, self._fileMetadata[file][key])) + + + ## @brief Set metadata values into the cache + # @detailed Manually sets the metadata cache values to the values given in the + # metadata key dictionary here. + # This is useful for setting values to make checks on file metadata handling. + # @note To really suppress any external function calls that gather metadata be careful + # to also set the @c _exists metadatum to @c True. + # @warning No checks are done on the values or keys given here, so you'd better + # know what you are doing. + # @param files Files to set metadata for (@c None means "all") + # @param metadataKeys Dictionary with metadata keys and values + def _setMetadata(self, files=None, metadataKeys={}): + if files == None: + files = self._value + for file in files: + for k, v in metadataKeys.iteritems(): + msg.debug('Manualy setting %s for file %s to %s'.format(k, file, v)) + self._fileMetadata[file][k] = v + + + ## @brief Test if certain metadata elements are already cached + # @detailed Will test for a cached value for all files and all keys + # given, aborting as soon as it finds a single uncached value. + # @param files Files to check (defaults to all files) + # @param metadataKeys Keys to check (defaults to all keys) + # @return Boolean if all keys are cached for all files + def isCached(self, files = None, metadataKeys = None): + msg.debug('Testing for cached values for files {0} and keys {1}'.format(files, metadataKeys)) + if files is None: + files = self._value + elif isinstance(files, str): + files = (files,) + if metadataKeys is None: + metadataKeys = self._metadataKeys.keys() + elif isinstance(metadataKeys, str): + metadataKeys = (metadataKeys,) + + isCachedFlag = True + for file in files: + for key in metadataKeys: + if key not in self._fileMetadata[file]: + isCachedFlag = False + break + if isCachedFlag == False: + break + + return isCachedFlag + + ## @brief Look for dataset name in dataset#filename Tier0 convention + # @detail At the moment all files must be in the same dataset if it's specified. + # To change this dataset will need to become a per-file metadatum. + # @param @c reset If @c True then forget previous dataset setting. Default is @c True. + # @return @c None. Side effect is to set @c self._metadata. + def _getDatasetFromFilename(self, reset = True): + if reset: + self._dataset = None + newValue = [] + for filename in self._value: + if filename.find('#') > -1: + (dataset, file) = filename.split('#', 1) + newValue.append(file) + msg.debug('Current dataset: {0}; New dataset {1}'.format(self._dataset, dataset)) + if (self._dataset is not None) and (self._dataset != dataset): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_DATASET'), + 'Found inconsistent dataset assignment in argFile setup: %s != %s' % (self._dataset, dataset)) + self._dataset = dataset + else: + newValue.append(filename) + self._value = newValue + + + def __str__(self): + return "%s (Dataset %s, IO %s)" % (self.value, self.dataset, self.io) + + + ## @brief Determines the size of files. + # @details Currently only for statable files (posix fs). Caches the + # @param files List of paths to the files for which the size is determined. + # @return None (internal @c self._fileMetadata cache is updated) + def _getSize(self, files): + for file in files: + if self._urlType is 'posix': + try: + self._fileMetadata[file]['size'] = os.stat(file).st_size + except (IOError, OSError) as e: + msg.error('Got exception {0!s} raised while stating file {1}'.format(e, file)) + self._fileMetadata[file]['size'] = None + else: + # OK, let's see if ROOT can do it... + msg.debug('Calling ROOT TFile.GetSize({0})'.format(file)) + self._fileMetadata[file]['size'] = ROOTGetSize(file) + + + ## @brief File integrity checker + # @details For a 'plain' file, integrity just checks that we can read it + # @param @c files List of paths to the files for which the integrity is determined + # @return None (internal @c self._fileMetadata cache is updated) + def _getIntegrity(self, files): + for file in files: + with open(file) as f: + try: + while True: + chunk = len(f.read(1024*1024)) + msg.debug('Read {0} bytes from {1}'.format(chunk, file)) + if chunk == 0: + break + self._fileMetadata[file]['integrity'] = True + except (OSError, IOError) as e: + msg.error('Got exception {0!s} raised while checking integrity of file {1}'.format(e, file)) + self._fileMetadata[file]['integrity'] = False + + + ## @brief Generate a GUID on demand - no intrinsic for this file type + # @details Use uuid.uuid4() call to generate a GUID + # @note This generation method will be superceeded in any file type which + # actually has an intrinsic GUID (e.g. BS or POOL files) + def _generateGUID(self, files): + for file in files: + msg.debug('Generating a GUID for file {0}'.format(file)) + self._fileMetadata[file]['file_guid'] = str(uuid.uuid4()).upper() + + + ## @brief Try to determine if a file actually exists... + # @details For a posix file, just call stat; for anything else call TFile.Open + # A small optimisation is to retieve the file_size metadatum at the same time. + # @param @c files List of paths to test for existance + # @return None (internal @c self._fileMetadata cache is updated) + def _exists(self, files): + msg.debug('Testing existance for {0}'.format(files)) + for file in files: + if self._urlType is 'posix': + try: + size = os.stat(file).st_size + self._fileMetadata[file]['file_size'] = size + self._fileMetadata[file]['_exists'] = True + msg.debug('POSIX file {0} exists'.format(file)) + except (IOError, OSError) as e: + msg.error('Got exception {0!s} raised while stating file {1} - probably it does not exist'.format(e, file)) + self._fileMetadata[file]['_exists'] = False + else: + # OK, let's see if ROOT can do it... + msg.debug('Calling ROOT TFile.GetSize({0})'.format(file)) + size = ROOTGetSize(file) + if size is None: + self._fileMetadata[file]['_exists'] = False + msg.error('Non-POSIX file {0} could not be opened - probably it does not exist'.format(file)) + else: + msg.debug('Non-POSIX file {0} exists'.format(file)) + self._fileMetadata[file]['file_size'] = size + self._fileMetadata[file]['_exists'] = True + + ## @brief String representation of a file argument + def __str__(self): + return "{0}={1} (Type {2}, Dataset {3}, IO {4})".format(self.name, self.value, self.type, self.dataset, self.io) + +## @brief Athena file class +# @details Never used directly, but is the parent of concrete classes +class argAthenaFile(argFile): + def __init__(self, value = list(), type=None, subtype=None, io = 'output', splitter=',', runarg=True, multipleOK = None, + name=None, executor=list(), mergeTargetSize=-1): + super(argAthenaFile, self).__init__(value=value, subtype=subtype, io=io, type=type, splitter=splitter, runarg=runarg, + multipleOK=multipleOK, name=name, executor=executor, mergeTargetSize=mergeTargetSize) + # Extra metadata known for athena files: + + for key in athFileInterestingKeys: + self._metadataKeys[key] = self._getAthInfo + + ## @brief Workhorse which actually calls AthFile + def _callAthInfo(self, files, doAllFiles, retrieveKeys): + if doAllFiles: + myFiles = self._value + else: + myFiles = files + msg.debug('Will retrieve AthFile info for {0!s}'.format(myFiles)) + athFileMetadata = AthenaFileInfo(myFiles, retrieveKeys=retrieveKeys, timeout=240+30*len(myFiles), defaultrc=None) + if athFileMetadata == None: + raise trfExceptions.TransformMetadataException(trfExit.nameToCode('TRF_METADATA_CALL_FAIL'), 'Call to AthenaFileInfo failed') + for file, fileMetadata in athFileMetadata.iteritems(): + msg.debug('Setting metadata for file {0} to {1}'.format(file, fileMetadata)) + self._fileMetadata[file].update(fileMetadata) + + ## @brief Small wrapper which sets the standard options for doAllFiles and retrieveKeys + def _getAthInfo(self, files): + self._callAthInfo(files, doAllFiles = True, retrieveKeys=athFileInterestingKeys) + + ## @brief Utility to strip arguments which should not be passed to the selfMerge methods + # of our child classes + # @param copyArgs If @c None copy all arguments by default, otherwise only copy the + # listed keys + def _mergeArgs(self, argdict, copyArgs=None): + if copyArgs: + myargdict = {} + for arg in copyArgs: + if arg in argdict: + myargdict[arg] = copy.copy(argdict[arg]) + + else: + myargdict = copy.copy(argdict) + # Never do event count checks for self merging + myargdict['checkEventCount'] = argSubstepBool('False', runarg=False) + if 'athenaopts' in myargdict: + # Need to ensure that "nprocs" is not passed to merger + newopts = [] + for opt in myargdict['athenaopts'].value: + if opt.startswith('--nprocs'): + continue + newopts.append(opt) + myargdict['athenaopts'] = argList(newopts, runarg=False) + return myargdict + + @property + def prodsysDescription(self): + super(argAthenaFile, self).prodsysDescription + return self._desc + + +## @brief ByteStream file class +class argBSFile(argAthenaFile): + + integrityFunction = "returnIntegrityOfBSFile" + + def _getIntegrity(self, files): + for file in files: + try: + rc=call(["AtlListBSEvents.exe", "-c", file], logger=msg, message="Report by AtlListBSEvents.exe: ", timeout=600) + except trfExceptions.TransformTimeoutException: + return False + if rc==0: + self._fileMetadata[file]['integrity'] = True + else: + self._fileMetadata[file]['integrity'] = False + + @property + def prodsysDescription(self): + super(argBSFile, self).prodsysDescription + return self._desc + + +## @brief POOL file class. +# @details Works for all POOL files +class argPOOLFile(argAthenaFile): + + integrityFunction = "returnIntegrityOfPOOLFile" + + # trfValidateRootFile is written in an odd way, so timelimit it here. + @timelimited() + def _getIntegrity(self, files): + for file in files: + from PyJobTransforms.trfValidateRootFile import checkFile + rc=checkFile(fileName=file, type='event', requireTree=True) + if rc==0: + self._fileMetadata[file]['integrity'] = True + else: + self._fileMetadata[file]['integrity'] = False + + @property + def prodsysDescription(self): + super(argPOOLFile, self).prodsysDescription + return self._desc + + ## @brief Method which can be used to merge files of this type + # @param output Target filename for this merge + # @param inputs List of files to merge + # @param argdict argdict of the transform + # @note @c argdict is not normally used as this is a @em vanilla merge + def selfMerge(self, output, inputs, argdict={}): + msg.debug('selfMerge attempted for {0} -> {1} with {2}'.format(inputs, output, argdict)) + + # First do a little sanity check + for file in inputs: + if file not in self._value: + raise trfExceptions.TransformMergeException(trfExit.nameToCode('TRF_FILEMERGE_PROBLEM'), + "File {0} is not part of this agument: {1}".format(file, self)) + + from PyJobTransforms.trfExe import athenaExecutor, executorConfig + + ## @note Modify argdict + myargdict = self._mergeArgs(argdict) + + # We need a athenaExecutor to do the merge + # N.B. We never hybrid merge AthenaMP outputs as this would prevent further merging in another + # task (hybrid merged files cannot be further bybrid merged) + myDataDictionary = {'POOL_MRG_INPUT' : argPOOLFile(inputs, type=self.type, io='input'), + 'POOL_MRG_OUTPUT' : argPOOLFile(output, type=self.type, io='output')} + myMergeConf = executorConfig(myargdict, myDataDictionary, disableMP=True) + myMerger = athenaExecutor(name='POOLMerge_AthenaMP.{0}'.format(self._subtype), conf=myMergeConf, + skeletonFile = 'RecJobTransforms/skeleton.MergePool_tf.py', + inData=set(['POOL_MRG_INPUT']), outData=set(['POOL_MRG_OUTPUT']), perfMonFile = 'ntuple_POOLMerge.pmon.gz') + myMerger.doAll(input=set(['POOL_MRG_INPUT']), output=set(['POOL_MRG_OUTPUT'])) + + # OK, if we got to here with no exceptions, we're good shape + # Now update our own list of files to reflect the merge + for file in inputs: + self._value.remove(file) + self._value.append(output) + + msg.debug('Post self-merge files are: {0}'.format(self._value)) + self._resetMetadata(inputs + [output]) + + +class argHITSFile(argPOOLFile): + + integrityFunction = "returnIntegrityOfPOOLFile" + + ## @brief Method which can be used to merge HITS files + def selfMerge(self, output, inputs, argdict={}): + msg.debug('selfMerge attempted for {0} -> {1} with {2}'.format(inputs, output, argdict)) + + # First do a little sanity check + for file in inputs: + if file not in self._value: + raise trfExceptions.TransformMergeException(trfExit.nameToCode('TRF_FILEMERGE_PROBLEM'), + "File {0} is not part of this agument: {1}".format(file, self)) + + ## @note Modify argdict + mySubstepName = 'HITSMerge_AthenaMP' + myargdict = self._mergeArgs(argdict, copyArgs=['geometryVersion', 'conditionsTag', 'preExec', 'postExec', 'preInclude', 'postInclude']) + + from PyJobTransforms.trfExe import athenaExecutor, executorConfig + myDataDictionary = {'HITS' : argHITSFile(inputs, type=self.type, io='input'), + 'HITS_MRG' : argHITSFile(output, type=self.type, io='output')} + myMergeConf = executorConfig(myargdict, myDataDictionary, disableMP=True) + myMerger = athenaExecutor(name = mySubstepName, skeletonFile = 'SimuJobTransforms/skeleton.HITSMerge.py', + conf=myMergeConf, + inData=set(['HITS']), outData=set(['HITS_MRG']), perfMonFile = 'ntuple_HITSMerge.pmon.gz') + myMerger.doAll(input=set(['HITS']), output=set(['HITS_MRG'])) + + # OK, if we got to here with no exceptions, we're good shape + # Now update our own list of files to reflect the merge + for file in inputs: + self._value.remove(file) + self._value.append(output) + + msg.debug('Post self-merge files are: {0}'.format(self._value)) + self._resetMetadata(inputs + [output]) + + +class argRDOFile(argPOOLFile): + + integrityFunction = "returnIntegrityOfPOOLFile" + + ## @brief Method which can be used to merge RDO files + def selfMerge(self, output, inputs, argdict={}): + msg.debug('selfMerge attempted for {0} -> {1} with {2}'.format(inputs, output, argdict)) + + # First do a little sanity check + for file in inputs: + if file not in self._value: + raise trfExceptions.TransformMergeException(trfExit.nameToCode('TRF_FILEMERGE_PROBLEM'), + "File {0} is not part of this agument: {1}".format(file, self)) + + ## @note Modify argdict + myargdict = self._mergeArgs(argdict) + + from PyJobTransforms.trfExe import athenaExecutor, executorConfig + myDataDictionary = {'RDO' : argHITSFile(inputs, type=self.type, io='input'), + 'RDO_MRG' : argHITSFile(output, type=self.type, io='output')} + myMergeConf = executorConfig(myargdict, myDataDictionary, disableMP=True) + myMerger = athenaExecutor(name = 'RDOMerge_AthenaMP', skeletonFile = 'RecJobTransforms/skeleton.MergeRDO_tf.py', + conf=myMergeConf, + inData=set(['RDO']), outData=set(['RDO_MRG']), perfMonFile = 'ntuple_RDOMerge.pmon.gz') + myMerger.doAll(input=set(['RDO']), output=set(['RDO_MRG'])) + + # OK, if we got to here with no exceptions, we're good shape + # Now update our own list of files to reflect the merge + for file in inputs: + self._value.remove(file) + self._value.append(output) + + msg.debug('Post self-merge files are: {0}'.format(self._value)) + self._resetMetadata(inputs + [output]) + + + + + +## @brief TAG file class +# @details Has a different validation routine to ESD/AOD POOL files +class argTAGFile(argPOOLFile): + + integrityFunction = "returnIntegrityOfTAGFile" + + ## @brief Simple integrity checked for TAG files + # @details Checks that the event count in the POOLCollectionTree is the same as the AthFile value + def _getIntegrity(self, files): + for file in files: + from PyJobTransforms.trfFileUtils import NTUPEntries + eventCount = NTUPEntries(file, ['POOLCollectionTree']) + if eventCount is None: + msg.error('Got a bad event count for the POOLCollectionTree in {0}: {1}'.format(file, eventCount)) + self._fileMetadata[file]['integrity'] = False + return + if eventCount != self.getSingleMetadata(file, 'nentries'): + msg.error('Event count for {0} from POOLCollectionTree disagrees with AthFile: {1} != {2}'.format(file, eventCount, self.getSingleMetadata(file, 'nentries'))) + self._fileMetadata[file]['integrity'] = False + return + self._fileMetadata[file]['integrity'] = True + + ## @brief Method which can be used to merge files of this type + # @param output Target filename for this merge + # @param inputs List of files to merge + # @param argdict argdict of the transform + # @note @c argdict is not normally used as this is a @em vanilla merge + def selfMerge(self, output, inputs, argdict={}): + msg.debug('selfMerge attempted for {0} -> {1} with {2}'.format(inputs, output, argdict)) + + # First do a little sanity check + for file in inputs: + if file not in self._value: + raise trfExceptions.TransformMergeException(trfExit.nameToCode('TRF_FILEMERGE_PROBLEM'), + "File {0} is not part of this agument: {1}".format(file, self)) + + from PyJobTransforms.trfExe import tagMergeExecutor, executorConfig + + ## @note Modify argdict + myargdict = self._mergeArgs(argdict) + + # We need a tagMergeExecutor to do the merge + myDataDictionary = {'TAG_MRG_INPUT' : argTAGFile(inputs, type=self.type, io='input'), + 'TAG_MRG_OUTPUT' : argTAGFile(output, type=self.type, io='output')} + myMergeConf = executorConfig(myargdict, myDataDictionary, disableMP=True) + myMerger = tagMergeExecutor(name='TAGMerge_AthenaMP.{0}'.format(self._subtype), exe = 'CollAppend', + conf=myMergeConf, + inData=set(['TAG_MRG_INPUT']), outData=set(['TAG_MRG_OUTPUT']),) + myMerger.doAll(input=set(['TAG_MRG_INPUT']), output=set(['TAG_MRG_OUTPUT'])) + + # OK, if we got to here with no exceptions, we're good shape + # Now update our own list of files to reflect the merge + for file in inputs: + self._value.remove(file) + self._value.append(output) + + msg.debug('Post self-merge files are: {0}'.format(self._value)) + self._resetMetadata(inputs + [output]) + + + @property + def prodsysDescription(self): + super(argTAGFile, self).prodsysDescription + return self._desc + + +## @brief Data quality histogram file class +class argHISTFile(argFile): + + integrityFunction = "returnIntegrityOfHISTFile" + + def __init__(self, value=list(), io = 'output', type=None, subtype=None, splitter=',', runarg=True, countable=True, multipleOK = None, + name=None): + super(argHISTFile, self).__init__(value=value, io=io, type=type, subtype=subtype, splitter=splitter, runarg=runarg, multipleOK=multipleOK, + name=name) + + # Make events optional for HISTs (can be useful for HIST_AOD, HIST_ESD before hist merging) + if countable: + self._metadataKeys.update({ + 'nentries': self._getNumberOfEvents + }) + + ## @brief There is no integrity check for HIST files - return 'UNDEFINED' + def _getIntegrity(self, files): + for file in files: + self._fileMetadata[file]['integrity'] = 'UNDEFINED' + + + def _getNumberOfEvents(self, files): + for file in files: + try: + self._fileMetadata[file]['nentries'] = HISTEntries(file) + except trfExceptions.TransformTimeoutException: + msg.error('Timeout counting events for {0}'.format(file)) + + @property + def prodsysDescription(self): + super(argHISTFile, self).prodsysDescription + return self._desc + + +## @brief NTUP (plain ROOT) file class +# @details Used for ATLAS D3PDs +class argNTUPFile(argFile): + + integrityFunction = "returnIntegrityOfNTUPFile" + + def __init__(self, value=list(), io = 'output', type=None, subtype=None, splitter=',', treeNames=None, runarg=True, multipleOK = None, + name=None, mergeTargetSize=-1): + super(argNTUPFile, self).__init__(value=value, io=io, type=type, subtype=subtype, splitter=splitter, runarg=runarg, multipleOK=multipleOK, + name=name, mergeTargetSize=mergeTargetSize) + self._treeNames=treeNames + + self._metadataKeys.update({ + 'nentries': self._getNumberOfEvents, + 'file_guid': self._generateGUID, + 'integrity': self._getIntegrity, + }) + + + def _getNumberOfEvents(self, files): + msg.debug('Retrieving event count for NTUP files {0}'.format(files)) + if self._treeNames is None: + msg.debug('treeNames is set to None - event count undefined for this NTUP') + for file in files: + self._fileMetadata[file]['nentries'] = 'UNDEFINED' + else: + for file in files: + try: + self._fileMetadata[file]['nentries'] = NTUPEntries(fileName=file, treeNames=self._treeNames) + except trfExceptions.TransformTimeoutException: + msg.error('Timeout counting events for {0}'.format(file)) + + + def _getIntegrity(self, files): + for file in files: + from PyJobTransforms.trfValidateRootFile import checkFile + rc=checkFile(fileName=file, type='basket', requireTree=False) + if rc==0: + self._fileMetadata[file]['integrity'] = True + else: + self._fileMetadata[file]['integrity'] = False + + + def selfMarge(self, files): + msg.debug('selfMerge attempted for {0} -> {1} with {2}'.format(inputs, output, argdict)) + + # First do a little sanity check + for file in inputs: + if file not in self._value: + raise trfExceptions.TransformMergeException(trfExit.nameToCode('TRF_FILEMERGE_PROBLEM'), + "File {0} is not part of this agument: {1}".format(file, self)) + + from PyJobTransforms.trfExe import NTUPMergeExecutor, executorConfig + + ## @note Modify argdict + myargdict = self._mergeArgs(argdict) + + # We need a NTUPMergeExecutor to do the merge + myDataDictionary = {'NTUP_MRG_INPUT' : argPOOLFile(inputs, type=self.type, io='input'), + 'NYUP_MRG_OUTPUT' : argPOOLFile(output, type=self.type, io='output')} + myMergeConf = executorConfig(myargdict, myDataDictionary, disableMP=True) + myMerger = NTUPMergeExecutor(name='NTUPMerge_AthenaMP.{0}'.format(self._subtype), conf=myMergeConf, + inData=set(['NTUP_MRG_INPUT']), outData=set(['NTUP_MRG_OUTPUT'])) + myMerger.doAll(input=set(['NTUP_MRG_INPUT']), output=set(['NYUP_MRG_OUTPUT'])) + + # OK, if we got to here with no exceptions, we're good shape + # Now update our own list of files to reflect the merge + for file in inputs: + self._value.remove(file) + self._value.append(output) + + msg.debug('Post self-merge files are: {0}'.format(self._value)) + self._resetMetadata(inputs + [output]) + + + @property + def prodsysDescription(self): + super(argNTUPFile, self).prodsysDescription + return self._desc + + + +## @brief TarBZ filetype +class argBZ2File(argFile): + def _getIntegrity(self, files): + for file in files: + # bz2 only supports 'with' from python 2.7 + try: + f = bz2.BZ2File(file, 'r') + while True: + chunk = len(f.read(1024*1024)) + msg.debug('Read {0} bytes from {1}'.format(chunk, file)) + if chunk == 0: + break + self._fileMetadata[file]['integrity'] = True + f.close() + except (OSError, IOError) as e: + msg.error('Got exception {0!s} raised while checking integrity of file {1}'.format(e, file)) + self._fileMetadata[file]['integrity'] = False + + + @property + def prodsysDescription(self): + super(argBZ2File, self).prodsysDescription + return self._desc + + +## @brief Class which defines a special input file format used in FTK simulation +class argFTKIPFile(argBZ2File): + def __init__(self, value=list(), io = 'output', type=None, splitter=',', runarg=True, multipleOK=None, name=None): + super(argFTKIPFile, self).__init__(value=value, io=io, type=type, splitter=splitter, runarg=runarg, multipleOK=multipleOK, + name=name) + self._metadataKeys.update({ + 'nentries': self._getNumberOfEvents + }) + + def _getNumberOfEvents(self, files): + for file in files: + try: + eventCount = 0 + f = bz2.BZ2File(file, 'r') + for line in f: + if line.startswith('F'): + eventCount += 1 + self._fileMetadata[file]['nentries'] = eventCount + except (OSError, IOError) as e: + msg.error('Event count for file {0} failed: {1!s}'.format(file, e)) + self._fileMetadata[file]['nentries'] = None + + @property + def prodsysDescription(self): + super(argFTKIPFile, self).prodsysDescription + return self._desc + +## @brief HEP ASCII file +# @details An ASCII file representation of HEP events +class argHepEvtAsciiFile(argFile): + def __init__(self, value=list(), io = 'output', type='txt_evt', splitter=',', runarg=True, multipleOK=None, name=None): + super(argHepEvtAsciiFile, self).__init__(value=value, io=io, type=type, splitter=splitter, runarg=runarg, + multipleOK=multipleOK, name=name) + self._metadataKeys.update({ + 'nentries': self._getNumberOfEvents + }) + + def _getNumberOfEvents(self, files): + for file in files: + try: + eventCount = 0 + f = open(file, 'r') + for line in f: + if len(line.split(" "))==3: + eventCount += 1 + self._fileMetadata[file]['nentries'] = eventCount + except (OSError, IOError) as e: + msg.error('Event count for file {0} failed: {1!s}'.format(file, e)) + self._fileMetadata[file]['nentries'] = None + + +## @brief Base class for substep arguments +# @details Sets up a dictionary with {substep1: value1, substep2: value2, ...} +# In this base class we don't put any restructions on the values - they will be specialised +# in children. +class argSubstep(argument): + + ## @brief argSubstep constructor + # @param defaultSubstep Default substep to use when no specific substep has been given - this should be + # set to @c first for arguments that apply, by default, to the first executor in the chain (e.g., @c maxEvents) + def __init__(self, value = None, runarg = True, name = None, defaultSubstep = 'all', separator = ':'): + self._defaultSubstep = defaultSubstep + self._separator = separator + super(argSubstep, self).__init__(value, runarg, name) + + # Reset getter + @property + def value(self): + return self._value + + # The default setter for sustep class + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstep from {0!s} (type {1}'.format(value, type(value))) + if value is None: + self._value = {} + elif isinstance(value, str): + subStep, subStepValue = self._parseStringAsSubstep(value) + self._value = {subStep: subStepValue} + elif isinstance(value, (list, tuple)): + # This is a list of strings to parse, so we go through them one by one + self._value = {} + for item in value: + if not isinstance(item, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert list item {0!s} to substep (should be a string)'.format(item)) + subStep, subStepValue = self._parseStringAsSubstep(item) + self._value[subStep] = subStepValue + elif isinstance(value, dict): + self._value = value + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Setter value {0!s} (type {1}) for substep argument cannot be parsed'.format(value, type(value))) + + + ## @brief Parse a string for substep:value format + # @details If the string matches the substep specifier regexp then we return the two parts; + # if not then the substep is returned as @c self._defaultSubstep, with the entire string passed + # back as the value + # @param string The string which should be parsed + def _parseStringAsSubstep(self, string): + subStepMatch = re.match(r'([a-zA-Z0-9]+)' + self._separator + r'(.*)', string) + if subStepMatch: + subStep = subStepMatch.group(1) + subStepValue = subStepMatch.group(2) + else: + subStep = self._defaultSubstep + subStepValue = string + msg.debug('Parsed {0} as substep {1}, argument {2}'.format(string, subStep, subStepValue)) + return subStep, subStepValue + + + ## @brief Return the value of this substep arg for an executor with the given parameters + # @param name Executor name + # @parem substep Executor substep nickname + # @param first Boolean flag set true if this is the first executor in the chain + # @param exe Executor instance, from which 'name', 'substep' and 'first' can be taken. + def returnMyValue(self, name=None, substep=None, first=False, exe=None): + if exe: + name = exe.name + substep = exe.substep + first = exe.conf.firstExecutor + + value = None + ## @note First we see if we have an explicit name or substep match, then a special 'first' or 'default' match + if name in self._value.keys(): + value = self._value[name] + elif substep in self._value.keys(): + value = self._value[substep] + elif first and 'first' in self._value.keys(): + value = self._value['first'] + elif 'default' in self._value.keys(): + value = self._value['default'] + + ## @note Now see how we should handle an 'all', if it exists. + # This means using it as the value, if that's not yet defined or, + # if it is defined and we have a mutable seq type, using it as + # a prefix to the more specific setting (this behaviour requested here: + # https://its.cern.ch/jira/browse/ATLASJT-24) + ## @note Defining all: for a key which is not composable (like a list) + # doesn't make much sense and, in this case, the specific value is allowed + # to trump the all: + if 'all' in self._value.keys(): + if value is None: + value = self._value['all'] + elif isinstance(value, list): + value = self._value['all'] + value + + msg.debug('From substep argument {myvalue} picked value "{value}" for {name}, {substep}, first={first}'.format(myvalue=self._value, value=value, name=name, substep=substep, first=first)) + + return value + + @property + def prodsysDescription(self): + self._desc = {'type' : 'Substep'} + return self._desc + +## @brief Argument class for substep lists, suitable for preExec/postExec +# @details substep is followed by a ':' then the python fragments. +# The substep is validated and set as the key in a dictionary with the value being the python following the ':'. +# - If no substep (r2e, e2e, etc) is specified, it will be parsed for 'all'. +# @note substep can be either the subspet property of an executor or the executor's name. +# Thus r2e:something is the same as RAWtoESD:something. +class argSubstepList(argSubstep): + + ## @brief argSubstepList constructor + # @details Adds the optional splitter argument to the base argSubstep class + # @param splitter Character used to split the string into multiple list elements + def __init__(self, value = None, runarg = True, name = None, defaultSubstep = 'all', splitter = None, separator=':'): + self._splitter = splitter + super(argSubstepList, self).__init__(value, runarg, name, defaultSubstep, separator) + + + # Reset getter + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstep from {0!s} (type {1}'.format(value, type(value))) + if value is None: + self._value = {} + elif isinstance(value, str): + subStep, subStepValue = self._parseStringAsSubstep(value) + self._value = {subStep: subStepValue} + elif isinstance(value, (list, tuple)): + # This is a list of strings to parse + self._value = {} + for item in value: + if not isinstance(item, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert list item {0!s} to substep (should be a string)'.format(item)) + subStep, subStepValue = self._parseStringAsSubstep(item) + if subStep in self._value: + self._value[subStep].extend(subStepValue) + else: + self._value[subStep] = subStepValue + elif isinstance(value, dict): + for k, v in value.iteritems(): + if not isinstance(k, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary key {0!s} for substep is not a string'.format(k)) + if not isinstance(v, list): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary value {0!s} for substep is not a list'.format(v)) + self._value = value + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Setter value {0!s} (type {1}) for substep argument cannot be parsed'.format(value, type(value))) + + ## @brief Specialist parser for lists, which applies the splitter string, if defined + # @return Tuple of substep plus a list of strings + def _parseStringAsSubstep(self, value): + subStep, subStepValue = super(argSubstepList, self)._parseStringAsSubstep(value) + if self._splitter: + return subStep, subStepValue.split(self._splitter) + else: + return subStep, [subStepValue] + + +## @brief Boolean substep argument +class argSubstepBool(argSubstep): + + # Reset getter + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstep from {0!s} (type {1}'.format(value, type(value))) + if value is None: + self._value = {} + elif isinstance(value, str): + subStep, subStepValue = self._parseStringAsSubstep(value) + self._value = {subStep: strToBool(subStepValue)} + elif isinstance(value, (list, tuple)): + # This is a list of strings to parse + self._value = {} + for item in value: + if not isinstance(item, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert list item {0!s} to substep (should be a string)'.format(item)) + subStep, subStepValue = self._parseStringAsSubstep(item) + self._value[subStep] = strToBool(subStepValue) + elif isinstance(value, dict): + for k, v in value.iteritems(): + if not isinstance(k, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary key {0!s} for substep is not a string'.format(k)) + if not isinstance(v, bool): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary value {0!s} for substep is not a bool'.format(v)) + self._value = value + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Setter value {0!s} (type {1}) for substep argument cannot be parsed'.format(value, type(value))) + + +## @brief Int substep argument +class argSubstepInt(argSubstep): + + # Reset getter + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstep from {0!s} (type {1}'.format(value, type(value))) + try: + if value is None: + self._value = {} + elif isinstance(value, str): + subStep, subStepValue = self._parseStringAsSubstep(value) + self._value = {subStep: int(subStepValue)} + elif isinstance(value, (list, tuple)): + # This is a list of strings to parse + self._value = {} + for item in value: + if not isinstance(item, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert list item {0!s} to substep (should be a string)'.format(item)) + subStep, subStepValue = self._parseStringAsSubstep(item) + self._value[subStep] = int(subStepValue) + elif isinstance(value, dict): + for k, v in value.iteritems(): + if not isinstance(k, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary key {0!s} for substep is not a string'.format(k)) + if not isinstance(v, (int, long)): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary value {0!s} for substep is not an int'.format(v)) + self._value = value + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Setter value {0!s} (type {1}) for substep argument cannot be parsed'.format(value, type(value))) + except ValueError, e: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert substep value {0} to int'.format(value)) + + + +## @brief Float substep argument +class argSubstepFloat(argSubstep): + + def __init__(self, value=None, min=None, max=None, runarg=True, name=None): + self._min = min + self._max = max + self._desc = {} + super(argSubstepFloat, self).__init__(value = value, runarg = runarg, name=name) + + @property + def prodsysDescription(self): + self._desc = {'type' : 'argSubstepFloat'} + if self._min: + self._desc['min'] = self._min + if self._max: + self._desc['max'] = self._max + return self._desc + + + # Reset getter + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstep from {0!s} (type {1}'.format(value, type(value))) + try: + if value is None: + self._value = {} + elif isinstance(value, str): + if (self._min != None and float(value) < self._min) or (self._max != None and float(value) > self._max): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_OUT_OF_RANGE'), + 'argFloat value out of range: %s is not between %s and %s' % + (value, self._min, self._max)) + else: + subStep, subStepValue = self._parseStringAsSubstep(value) + self._value = {subStep: float(subStepValue)} + elif isinstance(value, (list, tuple)): + # This is a list of strings to parse + self._value = {} + for item in value: + if not isinstance(item, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert list item {0!s} to substep (should be a string)'.format(item)) + if (self._min != None and float(item) < self._min) or (self._max != None and float(item) > self._max): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_OUT_OF_RANGE'), + 'argFloat value out of range: %s is not between %s and %s' % + (item, self._min, self._max)) + subStep, subStepValue = self._parseStringAsSubstep(item) + self._value[subStep] = float(subStepValue) + elif isinstance(value, dict): + for k, v in value.iteritems(): + if not isinstance(k, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary key {0!s} for substep is not a string'.format(k)) + if not isinstance(v, float): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Dictionary value {0!s} for substep is not an float'.format(v)) + if (self._min != None and float(v) < self._min) or (self._max != None and float(v) > self._max): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_OUT_OF_RANGE'), + 'argFloat value out of range: %s is not between %s and %s' % + (v, self._min, self._max)) + self._value = value + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Setter value {0!s} (type {1}) for substep argument cannot be parsed'.format(value, type(value))) + except ValueError, e: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert substep value {0} to float'.format(value)) + + +## @brief Special argument class to hold steering information +class argSubstepSteering(argSubstep): + # Reset getter + @property + def value(self): + return self._value + + ## @details For strings passed to the setter we expect the format to be @c substep:{in/out}{+/-}DATATYPE + # This is then cast into a dictionary of tuples {substep: [('in/out', '+/-', DATATYPE), ...], ...} + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstepSteering from {0!s} (type {1}'.format(value, type(value))) + if value is None: + self._value = {} + elif isinstance(value, dict): + # OK, this should be the direct setable dictionary - but do a check of that + for k, v in value.iteritems(): + if not isinstance(k, str) or not isinstance(v, list): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert dict {0!s} to argSubstepSteering'.format(value)) + for subv in v: + if not isinstance(subv, (list, tuple)) or len(subv) != 3 or subv[0] not in ('in', 'out') or subv[1] not in ('+', '-'): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert dict {0!s} to argSubstepSteering'.format(value)) + self._value = value + elif isinstance(value, str): + # Single string value case + subStep, subStepValue = self._parseStringAsSubstep(value) + self._value = {subStep: self._parseSteeringString(subStepValue)} + elif isinstance(value, (list, tuple)): + # This is a list of strings to parse + self._value = {} + for item in value: + if not isinstance(item, str): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert list item {0!s} to substep (should be a string)'.format(item)) + subStep, subStepValue = self._parseStringAsSubstep(item) + self._value.update({subStep: self._parseSteeringString(subStepValue)}) + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Setter value {0!s} (type {1}) for substep argument cannot be parsed'.format(value, type(value))) + + def _parseSteeringString(self, ivalue): + retvalue = [] + for subvalue in ivalue.split(','): + matchedParts = re.match(r'(in|out)(\+|\-)([A-Z_]+)$', subvalue) + if not matchedParts: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert string {0!s} to argSubstepSteering'.format(value)) + retvalue.append((matchedParts.group(1), matchedParts.group(2), matchedParts.group(3))) + return retvalue + + +class trfArgParser(argparse.ArgumentParser): + + ## @brief Subclassing argparse. + # @details Changed the defualt methods for add_arguments and parse_args. + # This is so that the help functions and the namespace object can be more useful. + # @note @c self._helpString. Is a dictionary with the where the key-value pairs are the argument name and the help text. + # @note @c self._argClass. Is a dictionary so the 'type' of argument, i.e. the class that it uses can be extracted. + def __init__(self, *args, **kwargs): + self._helpString = {} + self._argClass = {} + self._argGroups = {} + super(trfArgParser, self).__init__(*args, **kwargs) + + def add_argument(self, *args, **kwargs): + # Convert argument name to argparse standard + argName = cliToKey(args[0]) + msg.debug('found arg name {0}'.format(argName)) + + # Prevent a crash if this argument already exists (there are valid use cases for 'grabbing' an + # argument, so this is INFO, not WARNING) + if argName in self._argClass: + msg.info('Double definition of argument {0} - ignored'.format(argName)) + return + + # if there is a help function defined for the argument then populate the helpString dict + if 'help' in kwargs: + self._helpString[argName] = kwargs['help'] # if the help option is present for the argument then put it into the helpString dict key = argument name, value = help + else: + self._helpString[argName] = None + if 'type' in kwargs: + self._argClass[argName] = kwargs['type'] + else: + self._argClass[argName] = None + + # Remove kwargs which are not understood by ArgumentParser.add_argument() + strippedArgs = {} + for arg in ('group',): + if arg in kwargs: + strippedArgs[arg] = kwargs.pop(arg) + + # Optinally add an argument to an argparse argument group + if 'group' in strippedArgs: + if strippedArgs['group'] in self._argGroups: + msg.debug('Adding argument to group {0}: ({1}; {2})'.format(strippedArgs['group'], args, kwargs)) + self._argGroups[strippedArgs['group']].add_argument(*args, **kwargs) + else: + msg.warning('Argument group {0} not defined - adding argument to main parser'.format(strippedArgs['group'])) + msg.debug('Adding argument: ({0}; {1})'.format(args, kwargs)) + super(trfArgParser, self).add_argument(*args, **kwargs) + else: + msg.debug('Adding argument: ({0}; {1})'.format(args, kwargs)) + super(trfArgParser, self).add_argument(*args, **kwargs) + + def getProdsysDesc(self): + desc = {} + for name, argClass in self._argClass.iteritems(): + if type(argClass)!=type(None): + desc[name] = argClass().prodsysDescription + if self._helpString[name]: + desc[name].update({'help': self._helpString[name]}) + return desc + + ## Define an argparse argument group for the main parser to use + def defineArgGroup(self, *args): + # Get an argparse group + if args[0] in self._argGroups: + msg.warning('Argument group %s already exists' % args[0]) + return + self._argGroups[args[0]] = self.add_argument_group(*args) + + ## Return the help string for a given argument + def getHelpString(self, argument): + try: + return(self._helpString[argument]) + except KeyError: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_KEY_ERROR'), 'no help string available for argument %s' %argument) + return None + + ## @brief Return a list of all arguments understood by this transform in prodsys style + # @details Arguments which are irrelevant for production are removed and the '--' is added back on + def dumpArgs(self): + keyArray = [ '--' + str(key) for key in self._helpString.keys() if key not in ('h', 'verbose', 'loglevel', 'dumpargs', 'argdict') ] + keyArray.sort() + print 'ListOfDefaultPositionalKeys={0}'.format(keyArray) + + ## Getter for argument list + @property + def allArgs(self): + return self._helpString.keys() + + + ## @brief Call argument_parser parse_args, then concatenate values + # @details Sets-up the standard argparse namespace, then use a special + # treatment for lists (arising from nargs='+'), where values + # are appropriately concatenated and a single object is returned + # @return argument_parser namespace object + def parse_args(self, args = None, namespace = None): + if namespace: + super(trfArgParser, self).parse_args(args = args, namespace = namespace) + else: + namespace = super(trfArgParser, self).parse_args(args = args) + for k, v in namespace.__dict__.iteritems(): + msg.debug('Treating key %s (%s)' % (k, v)) + if isinstance(v, list): + # We build on the v[0] instance as this contains the correct metadata + # and object references for this instance (shallow copying can + # mess up object references and deepcopy thows exceptions!) + newValueObj = v[0] + msg.debug('Started with: %s = %s' % (type(newValueObj), newValueObj)) + if isinstance(v[0], argSubstep): + # Make sure you do not have a reference to the original value - this is a deeper copy + newValues = dictSubstepMerge(v[0].value, {}) + elif isinstance(v[0].value, list): + newValues = v[0].value + elif isinstance(v[0].value, dict): + newValues = v[0].value + else: + newValues = [v[0].value,] + for valueObj in v[1:]: + msg.debug('Value Object: %s = %s' % (type(valueObj), valueObj)) + if isinstance(v[0], argSubstep): + # Special merger for lists attached to substeps + newValues = dictSubstepMerge(newValues, valueObj.value) + elif isinstance(valueObj.value, list): + # General lists are concatenated + newValues.extend(valueObj.value) + elif isinstance(valueObj.value, dict): + # General dictionaries are merged + newValues.update(valueObj.value) + else: + newValues.append(valueObj.value) + newValueObj.value = newValues + namespace.__dict__[k] = newValueObj + msg.debug('Set to %s' % newValueObj.value) + + return namespace + + +## @brief Small utility to convert a string value to a boolean +def strToBool(string): + try: + if string.lower() == 'false': + return False + elif string.lower() == 'true': + return True + else: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert value {0} to bool'.format(string)) + except AttributeError: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert value {0} to bool'.format(string)) + +## @brief special dictionary merger which is used for substep type arguments +# @details Uses set class to get the unique list of keys. Any key present in only one dictionary +# is unaltered. If the values are lists they are catenated, otherwise the values are picked +# from the dictionary in which it is set. +# @param @c dict1 First dictionary +# @param @c dict2 Second dictionary +# @return Merged dictionary +# @raise trfExceptions.TransformArgException If both dictionaries contain non-list keys which +# are not the same. +def dictSubstepMerge(dict1, dict2): + mergeDict = {} + allKeys = set(dict1.keys()) | set(dict2.keys()) + # Find the value type - lists are special... + listType = False + if len(dict1) > 0: + if isinstance(dict1.values()[0], list): + listType = True + elif len(dict2) > 0: + if isinstance(dict2.values()[0], list): + listType = True + if listType: + for key in allKeys: + mergeDict[key] = dict1.get(key, []) + dict2.get(key, []) + else: + for key in allKeys: + if key in dict1 and key in dict2: + # Don't really know what to do if these clash... + if dict1[key] != dict2[key]: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Merging substep arguments found clashing values for substep {0}: {1}!={2}'.format(key, dict1[key], dict2[key])) + mergeDict[key] = dict1[key] + elif key in dict1: + mergeDict[key] = dict1[key] + else: + mergeDict[key] = dict2[key] + + return mergeDict + + diff --git a/Tools/PyJobTransforms/python/trfArgs.py b/Tools/PyJobTransforms/python/trfArgs.py new file mode 100644 index 0000000000000000000000000000000000000000..6dd74546230172bd4f0279840364caa536a3df60 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfArgs.py @@ -0,0 +1,461 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package PyJobTransforms.trfArgs +# @brief Standard arguments supported by trf infrastructure +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfArgs.py 613109 2014-08-22 16:55:12Z graemes $ + +import argparse +import logging +msg = logging.getLogger(__name__) +import unittest +import pickle +import os + +import PyJobTransforms.trfArgClasses as trfArgClasses +from PyJobTransforms.trfArgClasses import argFactory + +from PyJobTransforms.trfLogger import stdLogLevels +from PyJobTransforms.trfDecorators import silent +from PyJobTransforms.trfExitCodes import trfExit + +## Add standard transform arguments to an argparse ArgumentParser +def addStandardTrfArgs(parser): + parser.add_argument('--verbose', '--debug', action='store_true', help='Set transform loglevel to DEBUG') + parser.add_argument('--loglevel', choices=stdLogLevels.keys(), help='Set transform logging level') + parser.add_argument('--argdict', metavar='FILE', help='File containing pickled argument dictionary') + parser.add_argument('--argJSON', '--argjson', metavar='FILE', help='File containing JSON serialised argument dictionary') + parser.add_argument('--dumpargs', action='store_true', help='Dump transform arguments and exit') + parser.add_argument('--showGraph', action='store_true', help='Show multi-step transform graph, then exit') + parser.add_argument('--showPath', action='store_true', help='Show execution path only, then exit') + parser.add_argument('--showSteps', action='store_true', help='Show list of executor steps only, then exit') + parser.add_argument('--dumpPickle', metavar='FILE', help='Interpret command line arguments and write them out as a pickle file') + parser.add_argument('--dumpJSON', metavar='FILE', help='Interpret command line arguments and write them out as a JSON file') + parser.add_argument('--orphanKiller', action='store_true', help="Kill all orphaned children at the end of a job (that is, sharing the transform's pgid, but with ppid=1)." + "Beware, this is potentially dangerous in a a batch environment") + parser.add_argument('--reportName', type=argFactory(trfArgClasses.argString, runarg=False), + help='Base name for job reports (default name is "jobReport" for most reports, but "metadata" for classic prodsys XML)') + parser.add_argument('--reportType', type=argFactory(trfArgClasses.argList, runarg=False), nargs='+', metavar='TYPE', + help='Job reports to produce: valid values are "text", "json", "classic", "pilotPickle" and "gpickle"') + parser.add_argument('--execOnly', action='store_true', + help='Exec the first substep only, replacing the transform process (no job reports and the return code will be from the substep process)') + parser.add_argument('--env', type=argFactory(trfArgClasses.argSubstepList, runarg=False), metavar='substep:KEY=VALUE', nargs='+', + help='Explicitly set environment variables for an executor (default is all substeps).' + ' N.B. this setting is passed to the shell, so reference to shell variables is allowed, e.g.' + ' KEY=VALUE:$KEY') + parser.add_argument('--imf', type=argFactory(trfArgClasses.argSubstepBool, runarg=False), metavar="substep:BOOL", nargs='+', + help='Manually include/exclude the Intel IMF maths library ' + '(otherwise this is disabled for base releases < 17.7, enabled otherwise)') + parser.add_argument('--tcmalloc', type=argFactory(trfArgClasses.argSubstepBool, runarg=False), metavar="substep:BOOL", nargs='+', + help='Switch preload of the tcmalloc library (disabled by default)') + parser.add_argument('--AMIConfig', '--amiConfig', '--AMI', type=argFactory(trfArgClasses.argString, runarg=False), help='Configure transform with AMI tag') + parser.add_argument('--AMITag', '--amiConfigTag', '--AMIConfigTag', type=argFactory(trfArgClasses.argString), metavar='TAG', + help='AMI tag from which this job was defined - this option simply writes the ' + 'relevant AMI tag value into the output metadata, it does not configure the job (use --AMIConfig for that)') + parser.add_argument('--steering', type=argFactory(trfArgClasses.argSubstepSteering, runarg=False), nargs='+', metavar='substep:{in/out}{+-}DATA', + help='Steer the transform by manipulating the execution graph before the execution path is calculated. ' + 'Format is substep:{in,out}{+-}DATA,{in,out}{+-}DATA,... to modify the substep\'s input/output ' + ' by adding/removing a data type. e.g. RAWtoESD:in-RDO,in+RDO_TRIG would remove RDO and add ' + 'RDO_TRIG to the list of valid input datatypes for the RAWtoESD substep.') + + +## Options related to running athena in general +# TODO: Some way to mask certain options (perExec, e.g.) +## @brief Add standard athena options +# @param parser trfArgParser object +# @param maxEventsDefaultSubstep Special option which can change the default substep for maxEvents (needed by +# some special transforms). +def addAthenaArguments(parser, maxEventsDefaultSubstep='first'): + parser.defineArgGroup('Athena', 'General Athena Options') + parser.add_argument('--athenaopts', group = 'Athena', type=argFactory(trfArgClasses.argList, splitter=' ', runarg=False), metavar='OPT1 OPT2 OPT3', + help='Extra options to pass to athena. Will split on spaces. Options starting with "-" must be given as --athenaopts=\'--opt1 --opt2[=foo] ...\'') + parser.add_argument('--command', '-c', group = 'Athena', type=argFactory(trfArgClasses.argString, runarg=False), metavar='COMMAND', + help='Run %(metavar)s before all else') + parser.add_argument('--athena', group = 'Athena', type=argFactory(trfArgClasses.argString, runarg=False), metavar='ATHENA', + help='Use %(metavar)s as the athena executable') + parser.add_argument('--preExec', group = 'Athena', type=argFactory(trfArgClasses.argSubstepList), nargs='+', + metavar='substep:PREEXEC', + help='Python code to execute before main job options are included (can be optionally limited to a single substep)') + parser.add_argument('--preInclude', group = 'Athena', type=argFactory(trfArgClasses.argSubstepList, splitter=','), nargs='+', + metavar='substep:PREINCLUDE', + help='Python configuration fragment to include before main job options (can be optionally limited to a single substep). Will split on commas: frag1.py,frag2.py is understood.') + parser.add_argument('--postExec', group = 'Athena', type=argFactory(trfArgClasses.argSubstepList), nargs='+', + metavar='substep:POSTEXEC', + help='Python code to execute after main job options are included (can be optionally limited to a single substep)') + parser.add_argument('--postInclude', group = 'Athena', type=argFactory(trfArgClasses.argSubstepList, splitter=','), nargs='+', + metavar='substep:POSTINCLUDE', + help='Python configuration fragment to include after main job options (can be optionally limited to a single substep). Will split on commas: frag1.py,frag2.py is understood.') + parser.add_argument('--maxEvents', group='Athena', type=argFactory(trfArgClasses.argSubstepInt, defaultSubstep=maxEventsDefaultSubstep), + nargs='+', metavar='substep:maxEvents', + help='Set maximum events for each processing step (default substep is "{0}")'.format(maxEventsDefaultSubstep)) + parser.add_argument('--skipEvents', group='Athena', type=argFactory(trfArgClasses.argSubstepInt, defaultSubstep='first'), + help='Number of events to skip over in the first processing step') + parser.add_argument('--asetup', group='Athena', type=argFactory(trfArgClasses.argSubstep, runarg=False), nargs='+', metavar='substep:ASETUP', + help='asetup command string to be run before this substep is executed') + parser.add_argument('--eventAcceptanceEfficiency', type=trfArgClasses.argFactory(trfArgClasses.argSubstepFloat, min=0.0, max=1.0, runarg=False), + help='Allowed "efficiency" for processing events - used to ensure output file has enough events (default 1.0)') + parser.add_argument('--athenaMPMergeTargetSize', '--mts', type=trfArgClasses.argFactory(trfArgClasses.argKeyFloatValueList, runarg=False), + metavar='dataType:targetSizeInMegaBytes', nargs='+', + help='Set the target merge size for an AthenaMP output file type (give size in MB). ' + 'Note that the special value 0 means do not merge this output file; negative values means ' + 'always merge to a single file. Note that the datatype "ALL" will be used as a default ' + 'for all datatypes not explicitly given their own value.') + + +## @brief Options related to the setup of the ATLAS detector (used in simulation and digitisation +# as well as reconstruction) +# @param parser trfArgParser object +def addDetectorArguments(parser): + parser.defineArgGroup('Detector', 'General detector configuration options, for simulation and reconstruction') + parser.add_argument('--DBRelease', group = 'Detector', type=argFactory(trfArgClasses.argSubstep, runarg=False), metavar='substep:DBRelease', nargs='+', + help='Use DBRelease instead of ORACLE. Give either a DBRelease tarball file (e.g., DBRelease-21.7.1.tar.gz) or cvmfs DBRelease directory (e.g., 21.7.1 or current') + parser.add_argument('--conditionsTag', group='Detector', type=argFactory(trfArgClasses.argSubstep), metavar='substep:CondTag', nargs='+', + help='Conditions tag to set') + parser.add_argument('--geometryVersion', group='Detector', type=argFactory(trfArgClasses.argSubstep), metavar='substep:GeoVersion', nargs='+', + help='ATLAS geometry version tag') + parser.add_argument('--beamType', group='Detector', type=argFactory(trfArgClasses.argString), + help='Manual beam type setting') + parser.add_argument('--runNumber', '--RunNumber', group='Detector', type=argFactory(trfArgClasses.argInt), + help='Manual run number setting') + + +## @brief Add primary DPD arguments +# @detailed Get the list of current primary DPDs and add then to the parser +# optionally only some DPDs may be added, using the @c pick list. This function +# uses the @c silent decorator to supress useless messages from ROOT +# @param parser Argument parser object to add arguments to +# @param pick Optional list of DPD types to add (use short names, e.g., @c DESDM_MUON) +# @param transform Transform object. DPD data types will be added to the correct executor (by name or substep) +# @param multipleOK If the @c multipleOK flag should be set for this argument +#@silent +def addPrimaryDPDArguments(parser, pick = None, transform = None, multipleOK=False): + parser.defineArgGroup('Primary DPDs', 'Primary DPD File Options') + # list* really gives just a list of DPD names + try: + from PrimaryDPDMaker.PrimaryDPDFlags import listRAWtoDPD,listESDtoDPD,listAODtoDPD + for substep, dpdList in [(['r2e'], listRAWtoDPD), (['e2d'], listESDtoDPD), (['a2d'], listAODtoDPD)]: + for dpdName in [ dpd.replace('Stream', '') for dpd in dpdList ]: + msg.debug('Handling {0}'.format(dpdName)) + if pick == None or dpdName in pick: + # Need to decide which file type we actually have here + dpdType = dpdName.split('_')[0] + if 'RAW' in dpdType: + parser.add_argument('--output' + dpdName + 'File', + type=argFactory(trfArgClasses.argBSFile, multipleOK=multipleOK, type='RAW'), + group = 'Primary DPDs', metavar=dpdName.upper(), + help='DPD {0} output {1} file'.format(dpdType, dpdName)) + elif 'AOD' in dpdType: + parser.add_argument('--output' + dpdName + 'File', + type=argFactory(trfArgClasses.argPOOLFile, multipleOK=multipleOK, type='AOD'), + group = 'Primary DPDs', metavar=dpdName.upper(), + help='DPD {0} output {1} file'.format(dpdType, dpdName)) + elif 'ESD' in dpdType: + parser.add_argument('--output' + dpdName + 'File', + type=argFactory(trfArgClasses.argPOOLFile, multipleOK=multipleOK, type='ESD'), + group = 'Primary DPDs', metavar=dpdName.upper(), + help='DPD {0} output {1} file'.format(dpdType, dpdName)) + else: + msg.warning('Unrecognised primary DPD type: {0}'.format(dpdName)) + # Automatically add DPD as output data arguments of their relevant executors + if transform: + for executor in transform.executors: + if hasattr(executor, 'substep') and executor.substep in substep: + executor.outDataUpdate([dpdName]) + if executor.name in substep: + executor.outDataUpdate([dpdName]) + + except ImportError: + msg.warning('PrimaryDPDFlags not available - cannot add primary DPD arguments') + +## @brief Add top DAOD arguments +# @detailed Get the list of current top ADODs and add then to the parser +# optionally only some DPDs may be added, using the @c pick list. +# @param @c parser Argument parser object to add arguments to +# @param @c pick Optional list of DPD types to add (use short names, e.g., @c D2AODM_TOPJET) +def addTopPhysDAODArguments(parser, pick = None): + parser.defineArgGroup('Top DAODs', 'Top Physics DAOD File Options') + # TopPhysAllDAODs is a list of JobProperty type objects + try: + from TopPhysD2PDMaker.TopPhysD2PDFlags import TopPhysAllDAODs + for dpdWriter in TopPhysAllDAODs: + dpdName = dpdWriter.StreamName.replace('Stream', '') + if pick == None or dpdName in pick: + parser.add_argument('--output' + dpdName + 'File', + type=argFactory(trfArgClasses.argFile, substep=['a2d']), group='Top DAODs', + metavar=dpdName.upper(), help='Top ADOD output %s file (substep [a2d])' % (dpdName,)) + except ImportError: + msg.warning('TopPhysD2PDFlags not available - cannot add D2PD arguments') + + +## @brief Add D3PD arguments +# @detailed Get the list of current D3PDs and add then to the parser +# optionally only some DPDs may be added, using the @c pick list. +# @param parser Argument parser object to add arguments to +# @param pick Optional list of DPD types to add (use short names, e.g., @c NTUP_EGAMMA) +# @param transform Transform object. DPD data types will be added to the correct executor (by name or substep) +# @param multipleOK If the @c multipleOK flag should be set for this argument +# @param addD3PDMRGtypes Instead of normal output types for D3PDs, add @em input NTUPs and +# @em output merge NTUPs +def addD3PDArguments(parser, pick = None, transform = None, multipleOK=False, addD3PDMRGtypes = False): + parser.defineArgGroup('D3PD NTUPs', 'D3PD File Options') + # listAllKnownD3PD is a list of D3PD JobProperty type objects + try: + from D3PDMakerConfig.D3PDProdFlags import listAllKnownD3PD + for dpdWriter in listAllKnownD3PD: + dpdName = dpdWriter.StreamName.replace('Stream', '') + + if pick == None or dpdName in pick: + if addD3PDMRGtypes: + parser.add_argument('--input' + dpdName + 'File', + type=argFactory(trfArgClasses.argNTUPFile, treeNames=dpdWriter.TreeNames, io='input'), + group='D3PD NTUPs', + metavar=dpdName.upper(), help='D3PD input {0} file )'.format(dpdName), nargs='+') + parser.add_argument('--output' + dpdName + '_MRGFile', + type=argFactory(trfArgClasses.argNTUPFile, treeNames=dpdWriter.TreeNames), + group='D3PD NTUPs', + metavar=dpdName.upper(), help='D3PD merged output {0} file )'.format(dpdName)) + if transform: + for executor in transform.executors: + if executor.name == "NTUPLEMerge": + executor.inDataUpdate([dpdName]) + executor.outDataUpdate([dpdName+"_MRG"]) + else: + parser.add_argument('--output' + dpdName + 'File', + type=argFactory(trfArgClasses.argNTUPFile, treeNames=dpdWriter.TreeNames, multipleOK=multipleOK), + group='D3PD NTUPs', metavar=dpdName.upper(), + help='D3PD output {0} file (can be made in substeps {1})'.format(dpdName, ','.join(dpdWriter.SubSteps))) + # Automatically add D3PDs as data arguments of their relevant executors + if transform: + for executor in transform.executors: + if hasattr(executor, 'substep') and executor.substep in dpdWriter.SubSteps: + executor.outDataUpdate([dpdName]) + if executor.name in dpdWriter.SubSteps: + executor.outDataUpdate([dpdName]) + + except ImportError: + msg.warning('D3PDProdFlags not available - cannot add D3PD arguments') + + +## @brief Add reduction framework output file argument +# @detailed This is the base name for the NTUP output streams. It will get appended +# to it the names of the specific reduced types. +# @param parser Argument parser object to add arguments to +# @param transform Transform object, to add reduced object to (maybe don't need that here?) +def addReductionArguments(parser, transform = None): + parser.defineArgGroup('Reduction', 'Reduced Filetype Options') + parser.add_argument('--inputNTUP_COMMONFile', + type=argFactory(trfArgClasses.argNTUPFile, io='input', treeNames=['physics']), + group='Reduction', help='NTUP common input file', nargs='+') + parser.add_argument('--outputDNTUPFile', + type=argFactory(trfArgClasses.argNTUPFile, treeNames=None), + group='Reduction', help='Reduced NTUP output filename base') + parser.add_argument('--outputDAODFile', + type=argFactory(trfArgClasses.argPOOLFile, io='output', type='aod'), + help='Output reduced AOD filename base', group='Reduction') + parser.add_argument('--reductionConf', + type=argFactory(trfArgClasses.argList), nargs='+', + help='Configuration of reduced stream outputs', group='Reduction') + parser.add_argument('--passThrough', type=argFactory(trfArgClasses.argBool), metavar="True/False", + help='Run the derivation framework in a pass-through mode, needed for some MC samples. Needs to be implemented in derivation JOs') + + +## @brief Simple class to store information about extra DPD filetypes +# @detailed Implement this type of information as a class allowing for +# extensions to be easily added and for some convenient heuristics +# in the constructor +class dpdType(object): + + ## @brief Class constructor for dpdType + # @param name The name of this datatype (e.g., @c DRAW_ZEE, @c NTUP_SCT) + # @param type The argFile.type (should be the major datatype, e.g. @c bs, @c esd, @c aod, etc.) + # @param substeps The substeps or executor names where this data can be made + # @param argclass The argument class to be used for this data + # @param treeNames For DPD types only, the tree(s) used for event counting (if @c None then + # no event counting can be done. + def __init__(self, name, type = None, substeps = [], argclass = None, treeNames = None): + self._name = name + + ## @note Not very clear how useful this actually is, but we + # implement some heuristics here to avoid having to set + # the argFile.type always by hand + if type is None: + if 'RAW' in name: + self._type = 'bs' + elif 'ESD' in name: + self._type = 'esd' + elif 'AOD' in name: + self._type = 'aod' + elif 'NTUP' in name: + self._type = 'ntup' + else: + self._type = dataType + + ## @note If not given explictly apply some heuristics, watch out for this + # if your data is made in a non-standard step + if substeps == []: + if 'RAW' in name: + self._substeps = ['RAWtoESD'] + elif 'ESD' in name: + self._substeps = ['RAWtoESD'] + elif 'AOD' in name: + self._substeps = ['ESDtoAOD'] + elif 'NTUP' in name: + self._substeps = ['ESDtoDPD', 'AODtoDPD'] + else: + self._substeps = substeps + + ## @note Similarly if no explcit class is given we guess, YMMV + if argclass is None: + if 'RAW' in name: + self._argclass = trfArgClasses.argBSFile + elif 'ESD' in name: + self._argclass = trfArgClasses.argPOOLFile + elif 'AOD' in name: + self._argclass = trfArgClasses.argPOOLFile + elif 'NTUP' in name: + self._argclass = trfArgClasses.argNTUPFile + else: + self._argclass = argclass + + self._treeNames = treeNames + + @property + def name(self): + return self._name + + @property + def type(self): + return self._type + + @property + def substeps(self): + return self._substeps + + @property + def argclass(self): + return self._argclass + + @property + def treeNames(self): + return self._treeNames + + +## @brief Add additional DPD arguments +# @detailed Manually add DPDs that, for some reason, are not in any of the automated +# lists parsed by the companion functions above. +# @param parser Argument parser object to add arguments to +# @param pick Optional list of DPD types to add (use short names, e.g., @c NTUP_EGAMMA) +# @param transform Transform object. DPD data types will be added to the correct executor (by name or substep) +# @param multipleOK If the @c multipleOK flag should be set for this argument +def addExtraDPDTypes(parser, pick=None, transform=None, multipleOK=False): + parser.defineArgGroup('Additional DPDs', 'Extra DPD file types') + + # Note on deprecated arguments c.f. Reco_trf: + # TAG_COMM - doesn't produce any output so seems to be deprecated + # {DESD,DAOD}_{ZEE,ZMUMU,WENU} are in fact just aliases for ESD or AOD + + # Build up a simple list of extra DPD objects + extraDPDs = [] + extraDPDs.append(dpdType('NTUP_SCT', substeps=['r2e'])) + extraDPDs.append(dpdType('NTUP_MUFAST', substeps=['r2e'])) + extraDPDs.append(dpdType('NTUP_MUONCALIB', substeps=['r2e'], treeNames=['PatternNtupleMaker/Segments'])) + extraDPDs.append(dpdType('NTUP_TRKVALID', substeps=['r2e'])) + extraDPDs.append(dpdType('NTUP_FASTMON', substeps=['a2t','a2d','e2a'])) + extraDPDs.append(dpdType('NTUP_LARNOISE', substeps=['e2d'], treeNames=['CollectionTree'])) + extraDPDs.append(dpdType('NTUP_WZ', substeps=['e2d'], treeNames=['physics'])) + extraDPDs.append(dpdType('NTUP_TRT', substeps=['e2d'], treeNames=['MyCollectionTree'])) + extraDPDs.append(dpdType('NTUP_MCP', substeps=['e2d'], treeNames=['CollectionTree'])) + extraDPDs.append(dpdType('NTUP_HECNOISE', substeps=['e2d'], treeNames=['HECNoise'])) + extraDPDs.append(dpdType('NTUP_ENHBIAS', substeps=['e2d','e2a'], treeNames=['vertices'])) + extraDPDs.append(dpdType('NTUP_TRUTH', substeps=['a2d'], treeNames=['truth'])) + extraDPDs.append(dpdType('NTUP_SUSYTRUTH', substeps=['a2d'], treeNames=['truth'])) + extraDPDs.append(dpdType('NTUP_HIGHMULT', substeps=['e2a'], treeNames=['MinBiasTree'])) + extraDPDs.append(dpdType('NTUP_PROMPTPHOT', substeps=['e2d', 'a2d'], treeNames=["PAUReco","HggUserData"])) + + extraDPDs.append(dpdType('DESDM_BEAMSPOT')) + extraDPDs.append(dpdType('DAOD_HSG2')) + + for dpd in extraDPDs: + if pick == None or dpd.name in pick: + msg.debug('Adding DPD {0} ({1}, {2}, {3}, {4})'.format(dpd.name, dpd.type, dpd.substeps, dpd.treeNames, dpd.argclass)) + # NTUPs are a bit special as they can take a treeName to count events + if issubclass(dpd.argclass, trfArgClasses.argNTUPFile): + parser.add_argument('--output' + dpd.name + 'File', + type=argFactory(dpd.argclass, multipleOK=multipleOK, type=dpd.type, treeNames=dpd.treeNames), + group = 'Additional DPDs', metavar=dpd.name.upper(), + help='DPD output {0} file'.format(dpd.name)) + else: + parser.add_argument('--output' + dpd.name + 'File', + type=argFactory(dpd.argclass, multipleOK=multipleOK, type=dpd.type), + group = 'Additional DPDs', metavar=dpd.name.upper(), + help='DPD output {0} file'.format(dpd.name)) + if transform: + for executor in transform.executors: + if hasattr(executor, 'substep') and executor.substep in dpd.substeps: + executor.outDataUpdate([dpd.name]) + if executor.name in dpd.substeps: + executor.outDataUpdate([dpd.name]) + + +def addFileValidationArguments(parser): + parser.defineArgGroup('File Validation', 'Standard file validation switches') + parser.add_argument('--skipFileValidation', '--omitFileValidation', action='store_true', + group='File Validation', help='Skip both input and output file validation (warning - do not use this option in production jobs!)') + parser.add_argument('--skipInputFileValidation', '--omitInputFileValidation', action='store_true', + group='File Validation', help='Skip input file validation (warning - do not use this option in production jobs!)') + parser.add_argument('--skipOutputFileValidation', '--omitOutputFileValidation', action='store_true', + group='File Validation', help='Skip output file validation (warning - do not use this option in production jobs!)') + parser.add_argument('--parallelFileValidation', type = argFactory(trfArgClasses.argBool), + group='File Validation', help='Parallelise file validation') + +def addParallelJobProcessorArguments(parser): + parser.defineArgGroup('Parallel Job Processor', 'Parallel Job Processor arguments') + parser.add_argument('----parallelProcessPool', group='pool', type=argFactory(trfArgClasses.argInt, runarg=False), help='Number of processes in pool requested (int)') + +def addValidationArguments(parser): + parser.defineArgGroup('Validation', 'Standard job validation switches') + parser.add_argument('--ignoreFiles', '--ignoreFilters', group='Validation', type=argFactory(trfArgClasses.argList, splitter=',', runarg=False), + help='Files containing error patterns to be ignored during logfile scans (will split on commas; use "None" to disable the standard "atlas_error_mask.db")', nargs='+') + parser.add_argument('--ignorePatterns', group='Validation', type=argFactory(trfArgClasses.argList, splitter=',', runarg=False), + help='Regexp error patterns to be ignored during logfile scans (will be applied as a search against the whole logfile line)', nargs='+') + parser.add_argument('--ignoreErrors', type=argFactory(trfArgClasses.argBool, runarg=False), metavar="BOOL", group='Validation', + help='Ignore ERROR lines in logfiles (use with care this can mask serious problems; --ignorePatterns is prefered)') + parser.add_argument('--checkEventCount', type=trfArgClasses.argFactory(trfArgClasses.argSubstepBool, defaultSubstep = 'all', runarg=False), + help='Enable check of output events against input events (default: True)', group='Validation', + metavar="BOOL") + +## @brief Add trigger related arguments +def addTriggerArguments(parser, addTrigFilter=True): + parser.defineArgGroup('Trigger', 'Trigger Related Options') + parser.add_argument('--triggerConfig', + type=argFactory(trfArgClasses.argSubstep, defaultSubstep="RAWtoESD", separator='='), + metavar='substep=triggerConf', + help='Trigger configuration string (substep aware argument - default is to run trigger in RAWtoESD step, ' + 'use syntax SUBSTEP=TRIGCONF if you want to run trigger somewhere else). ' + 'N.B. This argument uses EQUALS (=) to separate the substep name from the value.', + group='Trigger') + if addTrigFilter: + parser.add_argument('--trigFilterList', + type=argFactory(trfArgClasses.argList), nargs="+", + help='Trigger filter list (multiple values can be given separately or split on commas; only understood in RAWtoESD)', + group='Trigger') + +## Tea for two and two for tea... these arguments are used for testing +def addTeaArguments(parser): + parser.defineArgGroup('Tea', 'Tea Making Options (for testing)') + parser.add_argument('--cupsOfTea', group='Tea', type=argFactory(trfArgClasses.argInt), help='Number of cups of tea requested (int)') + parser.add_argument('--teaType', group='Tea', type=argFactory(trfArgClasses.argString), help='Requested flavour of tea (string)') + parser.add_argument('--mugVolume', group='Tea', type=argFactory(trfArgClasses.argFloat), help='How large a cup to use (float)') + parser.add_argument('--drinkers', group='Tea', nargs='+', type=argFactory(trfArgClasses.argList), help='Who is drinking tea (list)') + +## @brief This method adds the current valid list of D3PDs to two lists +def listKnownD3PDs(): + inputD3PDList = [] + outputD3PDList = [] + from D3PDMakerConfig.D3PDProdFlags import listAllKnownD3PD + for dpdWriter in listAllKnownD3PD: + dpdName = dpdWriter.StreamName.replace('Stream', '') + inputD3PDList.append(dpdName) + outputD3PDList.append(dpdName+'_MRG') + return inputD3PDList, outputD3PDList + diff --git a/Tools/PyJobTransforms/python/trfDecorators.py b/Tools/PyJobTransforms/python/trfDecorators.py new file mode 100644 index 0000000000000000000000000000000000000000..471f4826b03092f579f5c79c31452c0e18db2f98 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfDecorators.py @@ -0,0 +1,234 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package PyJobTrasforms.trfDecorators +# @brief Some useful decorators used by the transforms +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfDecorators.py 590263 2014-03-31 13:01:37Z graemes $ + +import functools +import os +import Queue +import sys +import time +import unittest + +import PyJobTransforms.trfUtils as trfUtils +from PyJobTransforms.trfExitCodes import trfExit + +from PyJobTransforms.trfLogger import logging + + +## @brief Redirect stdout/err to /dev/null +# Useful wrapper to get rid of ROOT verbosity... +# N.B. May be somewhat dangerous in its present form - all errors disappear +# even ones you might want to see :-) +def silent(func): + def silent_running(*args, **kwargs): + # Create some filehandles to save the stdout/err fds to + save_err = open('/dev/null', 'w') + save_out = open('/dev/null', 'w') + os.dup2(sys.stderr.fileno(), save_err.fileno()) + os.dup2(sys.stdout.fileno(), save_out.fileno()) + + # Now open 'quiet' file handles and attach stdout/err + quiet_err = open('/dev/null', 'w') + quiet_out = open('/dev/null', 'w') + os.dup2(quiet_err.fileno(), sys.stderr.fileno()) + os.dup2(quiet_out.fileno(), sys.stdout.fileno()) + + # Execute function + rc = func(*args, **kwargs) + + # Restore fds + os.dup2(save_err.fileno(), sys.stderr.fileno()) + os.dup2(save_out.fileno(), sys.stdout.fileno()) + + return rc + # Make the wrapper look like the wrapped function + functools.update_wrapper(silent_running, func) + return silent_running + + +## @brief Decorator to wrap a transform in outer try: ... except: ... +def stdTrfExceptionHandler(func): + def exception_wrapper(*args, **kwargs): + # Setup imports which the wrapper needs + import signal + import traceback + import logging + msg = logging.getLogger(__name__) + + import PyJobTransforms.trfExceptions as trfExceptions + from PyJobTransforms.trfExitCodes import trfExit + + try: + return func(*args, **kwargs) + + except KeyboardInterrupt: + msg.critical('Caught a keyboard interrupt - exiting at your request.') + trfUtils.infanticide(message=True) + sys.exit(128 + signal.SIGINT) + + # This subclass is treated as a 'normal' exit condition + # but it should never happen in production as it's a transform definition error + except trfExceptions.TransformSetupException, e: + msg.critical('Transform setup failed: {0}'.format(e.errMsg)) + msg.critical('To help you debug here is the stack trace:') + msg.critical(traceback.format_exc(None)) + msg.critical('(Early exit - no job report is produced)') + trfUtils.infanticide(message=True) + sys.exit(e.errCode) + + except trfExceptions.TransformException, e: + msg.critical('Got a transform exception in the outer exception handler: {0!s}'.format(e)) + msg.critical('Stack trace is...') + msg.critical(traceback.format_exc(None)) + msg.critical('Job reports are likely to be missing or incomplete - sorry') + msg.critical('Please report this as a transforms bug!') + trfUtils.infanticide(message=True) + sys.exit(trfExit.nameToCode('TRF_UNEXPECTED_TRF_EXCEPTION')) + + except Exception, e: + msg.critical('Got a general exception in the outer exception handler: {0!s}'.format(e)) + msg.critical('Stack trace is...') + msg.critical(traceback.format_exc(None)) + msg.critical('Job reports are likely to be missing or incomplete - sorry') + msg.critical('Please report this as a transforms bug!') + trfUtils.infanticide(message=True) + sys.exit(trfExit.nameToCode('TRF_UNEXPECTED_OTHER_EXCEPTION')) + + functools.update_wrapper(exception_wrapper, func) + return exception_wrapper + + +## @brief Decorator to dump a stack trace when hit by SIGUSR +# Note that this decorator has to go inside the stdTrfExceptionHandler +# Or the general exception catcher catches the SigUser exception. +def sigUsrStackTrace(func): + import os + import signal + import traceback + import logging + msg = logging.getLogger(__name__) + + class SigUsr1(Exception): + pass + + def sigHandler(signum, frame): + msg.info('Handling signal %d in sigHandler' % signum) + raise SigUsr1 + + def signal_wrapper(*args, **kwargs): + signal.signal(signal.SIGUSR1, sigHandler) + + try: + return func(*args, **kwargs) + + except SigUsr1: + msg.error('Transform received SIGUSR1. Exiting now with stack trace...') + msg.error('(The important frame is usually the one before this trfDecorators module.)') + msg.error(traceback.format_exc(None)) + trfUtils.infanticide(message=True) + sys.exit(128 + signal.SIGUSR1) + + functools.update_wrapper(signal_wrapper, func) + return signal_wrapper + + + +def timelimited(timeout=None, retry=1, timefactor=1.5, sleeptime=10, defaultrc=None): + + import traceback + import Queue + import multiprocessing as mp + + from sys import exc_info + from PyJobTransforms.trfExceptions import TransformTimeoutException + + msg = logging.getLogger(__name__) + + def internal(func): + + ## @brief Run our wrapped function on the multiprocess queue + # @detail Run wrapper function and use the message queue to communicate status and result + # @return None. However, on the message queue add a tuple with two members: + # - @c key, which is True if function exited properly, False if an exception occurred + # - @c result, which is the output of the function or a tuple of exception information + def funcWithQueue(queue, *args, **kwargs): + try: + result = func(*args, **kwargs) + queue.put((True, result)) + except: + exc0=exc_info()[0] + exc1=exc_info()[1] + exc2=traceback.format_exc() + msg.warning('In time limited function %s an exception occurred' % (func.func_name)) + msg.warning('Original traceback:') + msg.warning(exc2) + queue.put((False,(exc0, exc1, exc2))) + + def funcWithTimeout(*args, **kwargs): + ltimeout=timeout + lretry=retry + ltimefactor=timefactor + lsleeptime=sleeptime + ldefaultrc=defaultrc + + if 'timeout' in kwargs: + ltimeout=kwargs.pop('timeout') + if 'retry' in kwargs: + lretry=kwargs.pop('retry') + if 'timefactor' in kwargs: + ltimefactor=kwargs.pop('timefactor') + if 'sleeptime' in kwargs: + lsleeptime=kwargs.pop('sleeptime') + if 'defaultrc' in kwargs: + ldefaultrc=kwargs.pop('defaultrc') + + if ltimeout is None: + # Run function normally with no timeout wrapper + msg.debug('Running {0}: {1} {2} without timeout'.format(func, args, kwargs)) + return func(*args, **kwargs) + + n=0 + while n<=lretry: + msg.info('Try %i out of %i (time limit %s s) to call %s.' % (n+1, retry+1, ltimeout, func.func_name)) + starttime = time.time() + q=mp.Queue(maxsize=1) + nargs = (q,) + args + proc=mp.Process(target=funcWithQueue, args=nargs, kwargs=kwargs) + proc.start() + try: + # Wait for function to run and return, but with a timeout + flag,result = q.get(block=True, timeout=ltimeout) + proc.join(60) + msg.info('Executed call within %d s.' % (time.time()-starttime)) + if flag: + return result + else: + msg.warning('But an exception occurred in function %s.' % (func.func_name)) + msg.warning('Returning default return code %s.' % ldefaultrc) + return ldefaultrc + except Queue.Empty: + # Our function did not run in time - kill increase timeout + msg.warning('Timeout limit of %d s reached. Kill subprocess and its children.' % ltimeout) + parent=proc.pid + pids=[parent] + pids.extend(trfUtils.listChildren(parent=parent, listOrphans = False)) + trfUtils.infanticide(pids) + proc.join(60) # Ensure cleanup + if n!=lretry: + msg.info('Going to sleep for %d s.' % lsleeptime) + time.sleep(lsleeptime) + n+=1 + ltimeout*=ltimefactor + lsleeptime*=ltimefactor + + msg.warning('All %i tries failed!' % n) + raise TransformTimeoutException(trfExit.nameToCode('TRF_EXEC_TIMEOUT'), 'Timeout in function %s' % (func.func_name)) + + return funcWithTimeout + + functools.update_wrapper(funcWithTimeout, func) + + return internal diff --git a/Tools/PyJobTransforms/python/trfDefaultFiles.py b/Tools/PyJobTransforms/python/trfDefaultFiles.py new file mode 100644 index 0000000000000000000000000000000000000000..36183fd341ecee9afff18d644e15195fba943b40 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfDefaultFiles.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Creation: John Chapman (Cambridge), September 2010 +# Usage: +# -specify default inputs for transforms + +#Default values of input/output types, for standard tests +DefaultInputs={ + 'inputBSFile' : '/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts', + 'inputRDOFile': '/afs/cern.ch/atlas/offline/test/mc11_valid.105200.T1_McAtNlo_Jimmy.digit.RDO.e835_s1310_s1300_d622.RDO.10evts.pool.root', + 'inputESDFile': '/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts.ESD.pool.root', + 'inputAODFile': '/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts.AOD.pool.root', + 'cosmicsBS' : '/afs/cern.ch/atlas/offline/test/data11_cos.00182609.physics_CosmicCalo.merge.RAW._lb0100._SFO-ALL._0001.1.SFO-ALL._0001.1.10evts.data', + 'topBS' : '/afs/cern.ch/atlas/offline/test/user.jboyd.DiLeptonicTopSkim.RAW.Run183347.10events.dat', + + 'inputEvgenFile': '/afs/cern.ch/atlas/offline/ProdData/15.6.11.3/mu_E50_eta0-25-7000.evgen.pool.root', + 'inputHITSFile' : 'root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.105200.T1_McAtNlo_Jimmy.simul.HITS.e598_s933_tid168076_00/HITS.168076._008421.pool.root.1', + 'NDMinbiasHitsFile': 'root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.105001.pythia_minbias.merge.HITS.e577_s932_s952_tid170554_00/HITS.170554._000034.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.105001.pythia_minbias.merge.HITS.e577_s932_s952_tid170554_00/HITS.170554._000043.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.105001.pythia_minbias.merge.HITS.e577_s932_s952_tid170554_00/HITS.170554._000060.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.105001.pythia_minbias.merge.HITS.e577_s932_s952_tid170554_00/HITS.170554._000082.pool.root.1', + #'SDMinbiasHitsFile': '/afs/cern.ch/atlas/offline/ProdData/15.6.11.3/mu_E50_eta0-25-7000_ATLAS-GEO-11-00-00.hits.pool.root', + #'DDMinbiasHitsFile': '/afs/cern.ch/atlas/offline/ProdData/15.6.11.3/mu_E50_eta0-25-7000_ATLAS-GEO-11-00-00.hits.pool.root', + 'cavernHitsFile': 'root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.005008.CavernInput.merge.HITS.e4_e607_s951_s952_tid170551_00/HITS.170551._000011.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.005008.CavernInput.merge.HITS.e4_e607_s951_s952_tid170551_00/HITS.170551._000111.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.005008.CavernInput.merge.HITS.e4_e607_s951_s952_tid170551_00/HITS.170551._000144.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.005008.CavernInput.merge.HITS.e4_e607_s951_s952_tid170551_00/HITS.170551._000150.pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.005008.CavernInput.merge.HITS.e4_e607_s951_s952_tid170551_00/HITS.170551._000151.pool.root.1', + 'beamHaloHitsFile': 'root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.108852.BeamHaloInputs.merge.HITS.e4_e567_s949_s952_tid170552_00/HITS.170552._000001.pool.root.1', + 'beamGasHitsFile': 'root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc10/mc10_7TeV.108863.Hijing_beamgas.merge.HITS.e4_s950_s952_tid170553_00/HITS.170552._000087.pool.root.1' + } + + +def getOutputFileName(fmt): + value='my'+fmt + fmt=fmt.split('_')[0] + if fmt in ['RAW', 'DRAW']: + value += '.data' + elif fmt in ['RDO', 'EVNT', 'TAG', 'HITS', 'ESD', 'DESD', 'D2ESD', 'DESDM', 'D2ESDM', 'AOD', 'DAOD', 'D2AOD', 'DAODM', 'D2AODM']: + value += '.pool.root' + elif fmt in ['HIST', 'NTUP']: + value += '.root' + return value + + +def getInputFileName(arg, tag=None): + arg=arg.lstrip('-') + if tag=="q120" and arg=='inputBSFile': + return DefaultInputs["cosmicsBS"] + elif tag=="q126" and arg=='inputBSFile': + return DefaultInputs["topBS"] + else: + fmt = arg.replace('input','').replace('File','') + return DefaultInputs.get(arg, getOutputFileName(fmt) ) + + + + diff --git a/Tools/PyJobTransforms/python/trfEnv.py b/Tools/PyJobTransforms/python/trfEnv.py new file mode 100644 index 0000000000000000000000000000000000000000..2e7829963fe133917265873b59f87532a2e9cbd6 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfEnv.py @@ -0,0 +1,151 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package PyJobTransforms.trfEnv +# @brief Support for environemnt variable manipulation in the transforms +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfEnv.py 588222 2014-03-18 14:37:06Z graemes $ + +import os +import os.path +import re + +import logging +msg = logging.getLogger(__name__) + +import PyJobTransforms.trfUtils as trfUtils + +## @brief Class holding the update to an environment that will be passed on to +# an executor +class environmentUpdate(object): + + def __init__(self): + self._envdict = {} + + ## @brief Setup the standard execution environment according to the + # switches in the transform configuration + def setStandardEnvironment(self, argdict, name='all', substep=''): + self.probeIMFSettings(argdict, name=name, substep=substep) + self.probeTcmallocSettings(argdict, name=name, substep=substep) + self.probeOtherSettings(argdict, name=name, substep=substep) + + ## @brief Add the libimf maths library to the setup + def probeIMFSettings(self, argdict, name='all', substep=''): + # If imf=True/False then follow its lead, but otherwise try to detect the release + # and enable if we have a release >= 17.7 + if 'imf' in argdict: + if argdict['imf'].returnMyValue(name=name, substep=substep) == False: + msg.info('Skipping inclusion of imf libraries: --imf is set to False') + else: + msg.info('Enabling inclusion of imf libraries: --imf is set to True') + self._addIMFSettings() + return + + # OK, try and detect the release + if trfUtils.releaseIsOlderThan(17, 7): + msg.info('No IMF by default for this release') + return + + msg.info('Enabling IMF by default for release') + self._addIMFSettings() + + + def _addIMFSettings(self): + if 'ATLASMKLLIBDIR_PRELOAD' not in os.environ: + msg.warning('"ATLASMKLLIBDIR_PRELOAD" not found in the current environment' + ' - no setup of MKL is possible') + return + + if 'ATLASMKLLIBDIR_PRELOAD' in os.environ: + if "LD_PRELOAD" not in self._envdict: + self._envdict["LD_PRELOAD"] = pathVar("LD_PRELOAD") + self._envdict["LD_PRELOAD"].add(os.path.join(os.environ['ATLASMKLLIBDIR_PRELOAD'], "libimf.so")) + self._envdict["LD_PRELOAD"].add(os.path.join(os.environ['ATLASMKLLIBDIR_PRELOAD'], "libintlc.so.5")) + + + ## @brief Add TCMALLOC to the setup + def probeTcmallocSettings(self, argdict, name='all', substep=''): + # We only do this if tcmalloc=True in atgdict + if 'tcmalloc' not in argdict or argdict['tcmalloc'].returnMyValue(name=name, substep=substep) is False: + msg.info('Skipping inclusion of tcmalloc') + return + + if 'TCMALLOCDIR' not in os.environ: + msg.warning('"TCMALLOCDIR" not found in the current environment' + ' - no setup of tcmalloc is possible') + return + + # For now we support the minimal version (it's the default) + if "LD_PRELOAD" not in self._envdict: + self._envdict["LD_PRELOAD"] = pathVar("LD_PRELOAD") + self._envdict["LD_PRELOAD"].add(os.path.join(os.environ['TCMALLOCDIR'], "libtcmalloc_minimal.so")) + + + ## @brief Add other settings + def probeOtherSettings(self, argdict, name='all', substep=''): + if 'env' not in argdict: + return + + myEnv = argdict['env'].returnMyValue(name=name, substep=substep) + if myEnv is None: + return + + for setting in myEnv: + try: + k, v = setting.split('=', 1) + self._envdict[k] = v + except ValueError: + msg.warning('Environment setting "{0}" seems to be invalid (must be KEY=VALUE)') + + ## @brief Return the value for a key, string converted + def value(self, key): + return str(self._envdict[key]) + + ## @brief Return a list of KEY=VALUE pairs for this environment + @property + def values(self): + return [ "{0}={1}".format(k, v) for k, v in self._envdict.iteritems() ] + + ## @brief Count the number of environment items that need to be updated + @property + def len(self): + return len(self._envdict) + + +## @brief Helper class for environment variables using colon separated paths +class pathVar(object): + _currentEnvironmentValue = "+++CURRENT+++" + + def __init__(self, varname, separator=":", testForExistance=True): + self._name = varname + self._separator = separator + self._testExistance = testForExistance + + # Note the special value @c _currentEnvironmentValue that will be expanded to the current + # setting in the environment (i.e., @c os.environ['self._name']), if it exists + self._value = [self._currentEnvironmentValue] + + ## @brief Add a new element to the variable + def add(self, value, prepend=True): + msg.debug('Adding new value "{0}" to envvar {1} (currently {2}'.format(value, self._name, self._value)) + if value in self._value: + msg.warning('Attempt to add environment element {0} twice to {1}'.format(value, self._name)) + if self._testExistance: + if not os.access(value, os.R_OK): + msg.warning("Path to {0} is not readable - will not add it to {1}".format(value, self._name)) + return + if prepend: + self._value[0:0] = [value] + else: + self._value.append(value) + + ## @brief Return the correct string representing the value for the shell + def __str__(self): + valStr = "" + for v in self._value: + if v == self._currentEnvironmentValue: + if self._name in os.environ: + valStr += "$" + self._name + self._separator + else: + valStr += v + self._separator + valStr = valStr[:-1] + return valStr diff --git a/Tools/PyJobTransforms/python/trfExceptions.py b/Tools/PyJobTransforms/python/trfExceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..edb5ab7c6a4075b0fbbbc1f8d79f0e330e286041 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfExceptions.py @@ -0,0 +1,91 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package PyJobTransforms.trfExceptions +# @brief Standard exceptions thrown by transforms +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfExceptions.py 570543 2013-11-14 21:52:16Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + + + +## Base class for transform exceptions +class TransformException(Exception): + def __init__(self, errCode, errMsg): + self._errCode = errCode + self._errMsg = errMsg + + def __str__(self): + return "%s (Error code %d)" % (self._errMsg, self._errCode) + + @property + def errCode(self): + return self._errCode + + @property + def errMsg(self): + return self._errMsg + + ## @brief Have a setter for @c errMsg as sometimes we want to update this with more + # information + @errMsg.setter + def errMsg(self, value): + self._errMsg = value + + +## Group of argument based exceptions +class TransformArgException(TransformException): + pass + +## Setup exceptions +class TransformSetupException(TransformException): + pass + +## Logfile exceptions +class TransformLogfileException(TransformException): + pass + +## Group of validation exceptions +class TransformValidationException(TransformException): + pass + +## Group of validation exceptions +class TransformReportException(TransformException): + pass + +## Exception class for validation failures detected by parsing logfiles +class TransformLogfileErrorException(TransformValidationException): + pass + +## Base class for execution exceptions +class TransformExecutionException(TransformException): + pass + +## Base class for file merging exceptions +class TransformMergeException(TransformExecutionException): + pass + +## Exception for problems finding the path through the graph +class TransformGraphException(TransformExecutionException): + pass + +## Transform internal errors +class TransformInternalException(TransformException): + pass + +## Exception used by time limited executions +class TransformTimeoutException(TransformException): + pass + +## Exception used by metadata functions +class TransformMetadataException(TransformException): + pass + +## Exception used by configuration via AMI tags +class TransformAMIException(TransformException): + pass + + diff --git a/Tools/PyJobTransforms/python/trfExe.py b/Tools/PyJobTransforms/python/trfExe.py new file mode 100755 index 0000000000000000000000000000000000000000..e163736cf4d51ab97dca2502c4cc6e34fab241aa --- /dev/null +++ b/Tools/PyJobTransforms/python/trfExe.py @@ -0,0 +1,1426 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfExe +# +# @brief Transform execution functions +# @details Standard transform executors +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfExe.py 615296 2014-09-05 15:18:54Z graemes $ + +import copy +import math +import os +import os.path +import re +import shutil +import subprocess +import sys +import unittest + +from xml.etree import ElementTree + +import logging +msg = logging.getLogger(__name__) + +from PyJobTransforms.trfJobOptions import JobOptionsTemplate +from PyJobTransforms.trfUtils import findFile, asetupReport, unpackTarFile, unpackDBRelease, setupDBRelease, cvmfsDBReleaseCheck, forceToAlphaNum, releaseIsOlderThan +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfLogger import stdLogLevels + + +import PyJobTransforms.trfExceptions as trfExceptions +import PyJobTransforms.trfValidation as trfValidation +import PyJobTransforms.trfArgClasses as trfArgClasses +import PyJobTransforms.trfEnv as trfEnv + +## @note This class contains the configuration information necessary to run an executor. +# In most cases this is simply a collection of references to the parent transform, however, +# abstraction is done via an instance of this class so that 'lightweight' executors can +# be run for auxiliary purposes (e.g., file merging after AthenaMP was used, where the merging +# is outside of the main workflow, but invoked in the main executor's "postExecute" method). +class executorConfig(object): + + ## @brief Configuration for an executor + # @param argdict Argument dictionary for this executor + # @param dataDictionary Mapping from input data names to argFile instances + # @param firstExecutor Boolean set to @c True if we are the first executor + # @param disableMP Ensure that AthenaMP is not used (i.e., also unset + # @c ATHENA_PROC_NUMBER before execution) + def __init__(self, argdict={}, dataDictionary={}, firstExecutor=False, disableMP=False): + self._argdict = argdict + self._dataDictionary = dataDictionary + self._firstExecutor = firstExecutor + self._disableMP = disableMP + + @property + def argdict(self): + return self._argdict + + @argdict.setter + def argdict(self, value): + self._argdict = value + + @property + def dataDictionary(self): + return self._dataDictionary + + @dataDictionary.setter + def dataDictionary(self, value): + self._dataDictionary = value + + @property + def firstExecutor(self): + return self._firstExecutor + + @firstExecutor.setter + def firstExecutor(self, value): + self._firstExecutor = value + + @property + def disableMP(self): + return self._disableMP + + @disableMP.setter + def disableMP(self, value): + self._disableMP = value + + ## @brief Set configuration properties from the parent transform + # @note It's not possible to set firstExecutor here as the transform holds + # the name of the first executor, which we don't know... (should we?) + def setFromTransform(self, trf): + self._argdict = trf.argdict + self._dataDictionary = trf.dataDictionary + + ## @brief Add a new object to the argdict + def addToArgdict(self, key, value): + self._argdict[key] = value + + ## @brief Add a new object to the dataDictionary + def addToDataDictionary(self, key, value): + self._dataDictionary[key] = value + + +## Executors always only even execute a single step, as seen by the transform +class transformExecutor(object): + + ## @brief Base class initaliser for transform executors + # @param name Transform name + # @param trf Parent transform + # @param conf executorConfig object (if @None then set from the @c trf directly) + # @param inData Data inputs this transform can start from. This should be a list, tuple or set + # consisting of each input data type. If a tuple (or list) is passed as a set member then this is interpreted as + # meaning that all of the data members in that tuple are necessary as an input. + # @note Curiously, sets are not allowed to be members of sets (they are not hashable, so no sub-sets) + # @param outData List of outputs this transform can produce (list, tuple or set can be used) + def __init__(self, name = 'Dummy', trf = None, conf = None, inData = set(), outData = set()): + # Some information to produce helpful log messages + self._name = forceToAlphaNum(name) + + # Data this executor can start from and produce + # Note we transform NULL to inNULL and outNULL as a convenience + self._inData = set(inData) + self._outData = set(outData) + if 'NULL' in self._inData: + self._inData.remove('NULL') + self._inData.add('inNULL') + if 'NULL' in self._outData: + self._outData.remove('NULL') + self._outData.add('outNULL') + + # It's forbidden for an executor to consume and produce the same datatype + dataOverlap = self._inData & self._outData + if len(dataOverlap) > 0: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_ERROR'), + 'Executor definition error, executor {0} is not allowed to produce and consume the same datatypes. Duplicated input/output types {1}'.format(self._name, ' '.join(dataOverlap))) + + ## Executor configuration: + # @note that if conf and trf are @c None then we'll probably set the conf up later (this is allowed and + # expected to be done once the master transform has figured out what it's doing for this job) + if conf is not None: + self.conf = conf + else: + self.conf = executorConfig() + if trf is not None: + self.conf.setFromTransform(trf) + + # Execution status + self._hasExecuted = False + self._rc = -1 + self._errMsg = None + + # Validation status + self._hasValidated = False + self._isValidated = False + + # Extra metadata + # This dictionary holds extra metadata for this executor which will be + # provided in job reports + self._extraMetadata = {} + + ## @note Place holders for resource consumption. CPU and walltime are available for all executors + # but currently only athena is instrumented to fill in memory stats (and then only if PerfMonSD is + # enabled). + self._exeStart = self._exeStop = None + self._memStats = {} + + + ## Now define properties for these data members + @property + def name(self): + return self._name + + @property + def substep(self): + if '_substep' in dir(self): + return self._substep + return None + + @property + def trf(self): + if '_trf' in dir(self): + return self._trf + return None + + @trf.setter + def trf(self, value): + self._trf = value + + @property + def inData(self): + ## @note Might not be set in all executors... + if '_inData' in dir(self): + return self._inData + return None + + @inData.setter + def inData(self, value): + self._inData = set(value) + + def inDataUpdate(self, value): + ## @note Protect against _inData not yet being defined + if '_inData' in dir(self): + self._inData.update(value) + else: + ## @note Use normal setter + self.inData = value + + + @property + def outData(self): + ## @note Might not be set in all executors... + if '_outData' in dir(self): + return self._outData + return None + + @outData.setter + def outData(self, value): + self._outData = set(value) + + def outDataUpdate(self, value): + ## @note Protect against _outData not yet being defined + if '_outData' in dir(self): + self._outData.update(value) + else: + ## @note Use normal setter + self.outData = value + + @property + ## @note This returns the @b actual input data with which this executor ran + # (c.f. @c inData which returns all the possible data types this executor could run with) + def input(self): + ## @note Might not be set in all executors... + if '_input' in dir(self): + return self._input + return None + + @property + ## @note This returns the @b actual output data with which this executor ran + # (c.f. @c outData which returns all the possible data types this executor could run with) + def output(self): + ## @note Might not be set in all executors... + if '_output' in dir(self): + return self._output + return None + + @property + def extraMetadata(self): + return self._extraMetadata + + @property + def hasExecuted(self): + return self._hasExecuted + + @property + def rc(self): + return self._rc + + @property + def errMsg(self): + return self._errMsg + + @property + def validation(self): + return self._validation + + @validation.setter + def validation(self, value): + self._validation = value + + @property + def hasValidated(self): + return self._hasValidated + + @property + def isValidated(self): + return self._isValidated + + ## @note At the moment only athenaExecutor sets this property, but that might be changed... + @property + def first(self): + if hasattr(self, '_first'): + return self._first + else: + return None + + @property + def exeStartTimes(self): + return self._exeStart + + @property + def exeStopTimes(self): + return self._exeStop + + @property + def cpuTime(self): + if self._exeStart and self._exeStop: + return int(reduce(lambda x1, x2: x1+x2, map(lambda x1, x2: x2-x1, self._exeStart[2:4], self._exeStop[2:4])) + 0.5) + else: + return None + + @property + def usrTime(self): + if self._exeStart and self._exeStop: + return int(self._exeStop[2] - self._exeStart[2] + 0.5) + else: + return None + + @property + def sysTime(self): + if self._exeStart and self._exeStop: + return int(self._exeStop[3] - self._exeStart[3] + 0.5) + else: + return None + + @property + def wallTime(self): + if self._exeStart and self._exeStop: + return int(self._exeStop[4] - self._exeStart[4] + 0.5) + else: + return None + + @property + def memStats(self): + return self._memStats + + + def preExecute(self, input = set(), output = set()): + msg.info('Preexecute for %s' % self._name) + + def execute(self): + self._exeStart = os.times() + msg.info('Starting execution of %s' % self._name) + self._hasExecuted = True + self._rc = 0 + self._errMsg = '' + msg.info('%s executor returns %d' % (self._name, self._rc)) + self._exeStop = os.times() + + def postExecute(self): + msg.info('Postexecute for %s' % self._name) + + def validate(self): + self._hasValidated = True + msg.info('Executor %s has no validation function - assuming all ok' % self._name) + self._isValidated = True + self._errMsg = '' + + ## Convenience function + def doAll(self, input=set(), output=set()): + self.preExecute(input, output) + self.execute() + self.postExecute() + self.validate() + + +class echoExecutor(transformExecutor): + def __init__(self, name = 'Echo', trf = None): + + # We are only changing the default name here + super(echoExecutor, self).__init__(name=name, trf=trf) + + + def execute(self): + self._exeStart = os.times() + msg.info('Starting execution of %s' % self._name) + msg.info('Transform argument dictionary now follows:') + for k, v in self.conf.argdict.iteritems(): + print "%s = %s" % (k, v) + self._hasExecuted = True + self._rc = 0 + self._errMsg = '' + msg.info('%s executor returns %d' % (self._name, self._rc)) + self._exeStop = os.times() + + +class scriptExecutor(transformExecutor): + def __init__(self, name = 'Script', trf = None, conf = None, inData = set(), outData = set(), exe = None, exeArgs = None): + # Name of the script we want to execute + self._exe = exe + + # With arguments (currently this means paste in the corresponding _argdict entry) + self._exeArgs = exeArgs + + super(scriptExecutor, self).__init__(name=name, trf=trf, conf=conf, inData=inData, outData=outData) + + self._extraMetadata.update({'script' : exe}) + + # Decide if we echo script output to stdout + self._echoOutput = False + + # Can either be written by base class or child + self._cmd = None + + @property + def exe(self): + return self._exe + + @exe.setter + def exe(self, value): + self._exe = value + self._extraMetadata['script'] = value + + @property + def exeArgs(self): + return self._exeArgs + + @exeArgs.setter + def exeArgs(self, value): + self._exeArgs = value +# self._extraMetadata['scriptArgs'] = value + + def preExecute(self, input = set(), output = set()): + msg.debug('scriptExecutor: Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) + + self._input = input + self._output = output + + ## @note If an inherited class has set self._cmd leave it alone + if self._cmd is None: + self._buildStandardCommand() + msg.info('Will execute script as %s', self._cmd) + + # Define this here to have it for environment detection messages + self._logFileName = "log.{0}".format(self._name) + + ## @note Query the environment for echo configuration + # Let the manual envars always win over auto-detected settings + if 'TRF_ECHO' in os.environ: + msg.info('TRF_ECHO envvar is set - enabling command echoing to stdout') + self._echoOutput = True + elif 'TRF_NOECHO' in os.environ: + msg.info('TRF_NOECHO envvar is set - disabling command echoing to stdout') + self._echoOutput = False + # PS1 is for sh, bash; prompt is for tcsh and zsh + elif 'PS1' in os.environ or 'prompt' in os.environ: + msg.info('Interactive environment detected (shell prompt) - enabling command echoing to stdout') + self._echoOutput = True + elif os.isatty(sys.stdout.fileno()) or os.isatty(sys.stdin.fileno()): + msg.info('Interactive environment detected (stdio or stdout is a tty) - enabling command echoing to stdout') + self._echoOutput = True + elif 'TZHOME' in os.environ: + msg.info('Tier-0 environment detected - enabling command echoing to stdout') + self._echoOutput = True + if self._echoOutput == False: + msg.info('Batch/grid running - command outputs will not be echoed. Logs for {0} are in {1}'.format(self._name, self._logFileName)) + + # Now setup special loggers for logging execution messages to stdout and file + self._echologger = logging.getLogger(self._name) + self._echologger.setLevel(logging.INFO) + self._echologger.propagate = False + + self._exeLogFile = logging.FileHandler(self._logFileName, mode='w') + self._exeLogFile.setFormatter(logging.Formatter('%(asctime)s %(message)s', datefmt='%H:%M:%S')) + self._echologger.addHandler(self._exeLogFile) + + if self._echoOutput: + self._echostream = logging.StreamHandler(sys.stdout) + self._echostream.setFormatter(logging.Formatter('%(name)s %(asctime)s %(message)s', datefmt='%H:%M:%S')) + self._echologger.addHandler(self._echostream) + + def _buildStandardCommand(self): + if self._exe: + self._cmd = [self.exe, ] + else: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), + 'No executor set in {0}'.format(self.__class__.__name__)) + for arg in self.exeArgs: + if arg in self.conf.argdict: + # If we have a list then add each element to our list, else just str() the argument value + # Note if there are arguments which need more complex transformations then + # consider introducing a special toExeArg() method. + if isinstance(self.conf.argdict[arg].value, list): + self._cmd.extend([ str(v) for v in self.conf.argdict[arg].value]) + else: + self._cmd.append(str(self.conf.argdict[arg].value)) + + + def execute(self): + self._hasExecuted = True + msg.info('Starting execution of {0} ({1})'.format(self._name, self._cmd)) + + self._exeStart = os.times() + if ('execOnly' in self.conf.argdict and self.conf.argdict['execOnly'] == True): + msg.info('execOnly flag is set - execution will now switch, replacing the transform') + os.execvp(self._cmd[0], self._cmd) + + p = subprocess.Popen(self._cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + if line: + self._echologger.info(line.rstrip()) + + # Hoover up remaining buffered output lines + for line in p.stdout: + self._echologger.info(line.rstrip()) + + self._rc = p.returncode + msg.info('%s executor returns %d' % (self._name, self._rc)) + self._exeStop = os.times() + + + def postExecute(self): + if hasattr(self._exeLogFile, 'close'): + self._exeLogFile.close() + + + def validate(self): + self._hasValidated = True + + ## Check rc + if self._rc == 0: + msg.info('Executor %s validated successfully (return code %s)' % (self._name, self._rc)) + self._isValidated = True + self._errMsg = '' + else: + self._isValidated = False + self._errMsg = 'Non-zero return code from %s (%d)' % (self._name, self._rc) + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_FAIL'), self._errMsg) + + ## Check event counts (always do this by default) + # Do this here so that all script executors have this by default (covers most use cases with events) + if 'checkEventCount' in self.conf.argdict.keys() and self.conf.argdict['checkEventCount'].returnMyValue(exe=self) is False: + msg.info('Event counting for substep {0} is skipped'.format(self.name)) + else: + checkcount=trfValidation.eventMatch(self) + checkcount.decide() + msg.info('Event counting for substep {0} passed'.format(self.name)) + + + +class athenaExecutor(scriptExecutor): + _exitMessageLimit = 200 # Maximum error message length to report in the exitMsg + _defaultIgnorePatternFile = ['atlas_error_mask.db'] + + ## @brief Initialise athena executor + # @param name Executor name + # @param trf Parent transform + # @param skeletonFile athena skeleton job options file (optionally this can be a list of skeletons + # that will be given to athena.py in order); can be set to @c None to disable writing job options + # files at all + # @param exe Athena execution script + # @param exeArgs Transform argument names whose value is passed to athena + # @param substep The athena substep this executor represents (alias for the name) + # @param inputEventTest Boolean switching the skipEvents < inputEvents test + # @param perfMonFile Name of perfmon file for this substep (used to retrieve vmem/rss information) + # @param tryDropAndReload Boolean switch for the attempt to add '--drop-and-reload' to athena args + # @param extraRunargs Dictionary of extra runargs to write into the job options file, using repr + # @param runtimeRunargs Dictionary of extra runargs to write into the job options file, using str + # @param literalRunargs List of extra lines to write into the runargs file + # @param dataArgs List of datatypes that will always be given as part of this transform's runargs + # even if not actually processed by this substep (used, e.g., to set random seeds for some generators) + # @param checkEventCount Compare the correct number of events in the output file (either input file size or maxEvents) + # @param errorMaskFiles List of files to use for error masks in logfile scanning (@c None means not set for this + # executor, so use the transform or the standard setting) + # @param manualDataDictionary Instead of using the inData/outData parameters that binds the data types for this + # executor to the workflow graph, run the executor manually with these data parameters (useful for + # post-facto executors, e.g., for AthenaMP merging) + # @note The difference between @c extraRunargs, @runtimeRunargs and @literalRunargs is that: @c extraRunargs + # uses repr(), so the RHS is the same as the python object in the transform; @c runtimeRunargs uses str() so + # that a string can be interpreted at runtime; @c literalRunargs allows the direct insertion of arbitary python + # snippets into the runArgs file. + def __init__(self, name = 'athena', trf = None, conf = None, skeletonFile = 'PyJobTransforms/skeleton.dummy.py', inData = set(), + outData = set(), exe = 'athena.py', exeArgs = ['athenaopts'], substep = None, inputEventTest = True, + perfMonFile = None, tryDropAndReload = True, extraRunargs = {}, runtimeRunargs = {}, + literalRunargs = [], dataArgs = [], checkEventCount = False, errorMaskFiles = None, + manualDataDictionary = None): + + self._substep = forceToAlphaNum(substep) + self._athenaMP = None # As yet unknown; N.B. this flag is used for AthenaMP version 2+. For AthenaMP-I it is set to False + self._inputEventTest = inputEventTest + self._perfMonFile = perfMonFile + self._tryDropAndReload = tryDropAndReload + self._extraRunargs = extraRunargs + self._runtimeRunargs = runtimeRunargs + self._literalRunargs = literalRunargs + self._dataArgs = dataArgs + self._errorMaskFiles = errorMaskFiles + + # SkeletonFile can be None (disable) or a string or a list of strings - normalise it here + if type(skeletonFile) is str: + self._skeleton = [skeletonFile] + else: + self._skeleton = skeletonFile + + super(athenaExecutor, self).__init__(name=name, trf=trf, conf=conf, inData=inData, outData=outData, exe=exe, exeArgs=exeArgs) + + # Add athena specific metadata + self._extraMetadata.update({'substep': substep}) + + # Setup JO templates + if self._skeleton is not None: + self._jobOptionsTemplate = JobOptionsTemplate(exe = self, version = '$Id: trfExe.py 615296 2014-09-05 15:18:54Z graemes $') + else: + self._jobOptionsTemplate = None + + + + @property + def substep(self): + return self._substep + + def preExecute(self, input = set(), output = set()): + msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) + + # Try to detect AthenaMP mode + # The first flag indicates if the transform needs to handle the AthenaMP merging (i.e., AthenaMP v2) + # The first flag is set true in order to disable the --drop-and-reload option because AthenaMP v1 + # cannot handle it + self._athenaMP, self._athenaMPv1 = self._detectAthenaMP() + + # And if this is athenaMP, then set some options for workers and output file report + if self._athenaMP: + self._athenaMPWorkerTopDir = 'athenaMP-workers-{0}-{1}'.format(self._name, self._substep) + self._athenaMPFileReport = 'athenaMP-outputs-{0}-{1}'.format(self._name, self._substep) + # See if we have options for the target output file size + if 'athenaMPMergeTargetSize' in self.conf._argdict: + for dataType, targetSize in self.conf._argdict['athenaMPMergeTargetSize'].value.iteritems(): + if dataType in self.conf._dataDictionary: + self.conf._dataDictionary[dataType].mergeTargetSize = targetSize * 1000000 # Convert from MB to B + msg.info('Set target merge size for {0} to {1}'.format(dataType, self.conf._dataDictionary[dataType].mergeTargetSize)) + elif 'ALL' in self.conf._dataDictionary: + self.conf._dataDictionary['ALL'].mergeTargetSize = targetSize * 1000000 + msg.info('Set target merge size for {0} to {1} (from ALL value)'.format(dataType, self.conf._dataDictionary[dataType].mergeTargetSize)) + else: + self._athenaMPWorkerTopDir = self._athenaMPFileReport = None + + + # Check we actually have events to process! + if (self._inputEventTest and 'skipEvents' in self.conf.argdict and + self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) is not None): + msg.debug('Will test for events to process') + for dataType in input: + inputEvents = self.conf.dataDictionary[dataType].nentries + msg.debug('Got {0} events for {1}'.format(inputEvents, dataType)) + if not isinstance(inputEvents, (int, long)): + msg.warning('Are input events countable? Got nevents={0} so disabling event count check for this input'.format(inputEvents)) + elif self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) >= inputEvents: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_NOEVENTS'), + 'No events to process: {0} (skipEvents) >= {1} (inputEvents of {2}'.format(self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor), inputEvents, dataType)) + + ## Write the skeleton file and prep athena + if self._skeleton is not None: + inputFiles = dict() + for dataType in input: + inputFiles[dataType] = self.conf.dataDictionary[dataType] + outputFiles = dict() + for dataType in output: + outputFiles[dataType] = self.conf.dataDictionary[dataType] + + # See if we have any 'extra' file arguments + for dataType, dataArg in self.conf.dataDictionary.iteritems(): + if dataArg.io == 'input' and self._name in dataArg.executor: + inputFiles[dataArg.subtype] = dataArg + + msg.debug('Input Files: {0}; Output Files: {1}'.format(inputFiles, outputFiles)) + + # Get the list of top options files that will be passed to athena (=runargs file + all skeletons) + self._topOptionsFiles = self._jobOptionsTemplate.getTopOptions(input = inputFiles, + output = outputFiles) + + ## Add input/output file information - this can't be done in __init__ as we don't know what our + # inputs and outputs will be then + if len(input) > 0: + self._extraMetadata['inputs'] = list(input) + if len(output) > 0: + self._extraMetadata['outputs'] = list(output) + + ## Do we need to run asetup first? + asetupString = None + if 'asetup' in self.conf.argdict: + asetupString = self.conf.argdict['asetup'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) + else: + msg.info('Asetup report: {0}'.format(asetupReport())) + + ## DBRelease configuration + dbrelease = dbsetup = None + if 'DBRelease' in self.conf.argdict: + dbrelease = self.conf.argdict['DBRelease'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) + if dbrelease: + # Classic tarball - filename format is DBRelease-X.Y.Z.tar.gz + dbdMatch = re.match(r'DBRelease-([\d\.]+)\.tar\.gz', os.path.basename(dbrelease)) + if dbdMatch: + msg.debug('DBRelease setting {0} matches classic tarball file'.format(dbrelease)) + if not os.access(dbrelease, os.R_OK): + msg.warning('Transform was given tarball DBRelease file {0}, but this is not there'.format(dbrelease)) + msg.warning('I will now try to find DBRelease {0} in cvmfs'.format(dbdMatch.group(1))) + dbrelease = dbdMatch.group(1) + dbsetup = cvmfsDBReleaseCheck(dbrelease) + else: + # Check if the DBRelease is setup + unpacked, dbsetup = unpackDBRelease(tarball=dbrelease, dbversion=dbdMatch.group(1)) + if unpacked: + # Now run the setup.py script to customise the paths to the current location... + setupDBRelease(dbsetup) + # For cvmfs we want just the X.Y.Z release string (and also support 'current') + else: + dbsetup = cvmfsDBReleaseCheck(dbrelease) + + ## Look for environment updates and perpare the athena command line + self._envUpdate = trfEnv.environmentUpdate() + self._envUpdate.setStandardEnvironment(self.conf.argdict, name=self.name, substep=self.substep) + self._prepAthenaCommandLine() + + + super(athenaExecutor, self).preExecute(input, output) + + # Now we always write a wrapper, because it's very convenient for re-running individual substeps + # This will have asetup and/or DB release setups in it + # Do this last in this preExecute as the _cmd needs to be finalised + msg.info('Now writing wrapper for substep executor {0}'.format(self._name)) + self._writeAthenaWrapper(asetup=asetupString, dbsetup=dbsetup) + msg.info('Athena will be executed in a subshell via {0}'.format(self._cmd)) + + + def postExecute(self): + super(athenaExecutor, self).postExecute() + + # If this was an athenaMP run then we need to update output files + if self._athenaMP: + if os.path.exists(self._athenaMPFileReport): + try: + try: + outputFileArgs = [ self.conf.dataDictionary[dataType] for dataType in self._output ] + except KeyError, e: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC'), + 'Failed to find output file argument instances for outputs {0} in {1}'.format(self.outData, self.name)) + mpOutputs = ElementTree.ElementTree() + mpOutputs.parse(self._athenaMPFileReport) + for filesElement in mpOutputs.getroot().getiterator(tag='Files'): + msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib)) + originalArg = None + originalName = filesElement.attrib['OriginalName'] + for fileArg in outputFileArgs: + if fileArg.value[0] == originalName: + originalArg = fileArg + break + if originalArg is None: + msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(originalName)) + continue + msg.debug('Found matching argument {0}'.format(originalArg)) + fileNameList = [] + for fileElement in filesElement.getiterator(tag='File'): + msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib)) + fileNameList.append(fileElement.attrib['name']) + # Now update argument with the new name list and reset metadata + originalArg.multipleOK = True + originalArg.value = fileNameList + originalArg.originalName = originalName + msg.debug('Argument {0} value now {1}'.format(originalArg, originalArg.value)) + # Merge? + if originalArg.io is 'output' and len(originalArg.value) > 1: + msg.debug('{0} files {1} are candidates for smart merging'.format(originalArg.name, originalArg.value)) + self._smartMerge(originalArg) + except Exception, e: + msg.error('Exception thrown when processing athenaMP outputs report {0}: {1}'.format(self._athenaMPFileReport, e)) + msg.error('Validation is now very likely to fail') + raise + else: + msg.warning('AthenaMP run was set to True, but no outputs file was found') + + # If we have a perfmon file, get memory information + if self._perfMonFile: + try: + import PerfMonComps.PMonSD + info = PerfMonComps.PMonSD.parse(self._perfMonFile) + vmem_peak = int(info[0]['special']['values']['vmem_peak']) + vmem_mean = int(info[0]['special']['values']['vmem_mean']) + rss_mean = int(info[0]['special']['values']['rss_mean']) + self._memStats = {'vmemPeak': vmem_peak, 'vmemMean': vmem_mean, 'rssMean': rss_mean} + msg.debug('Found these memory stats from {0}: {1}'.format(self._perfMonFile, self._memStats)) + except Exception, e: + msg.warning('Failed to process expected perfMon stats file {0}: {1}'.format(self._perfMonFile, e)) + + + def validate(self): + self._hasValidated = True + deferredException = None + + ## Our parent will check the RC for us + try: + super(athenaExecutor, self).validate() + except trfExceptions.TransformValidationException, e: + # In this case we hold this exception until the logfile has been scanned + msg.error('Validation of return code failed: {0!s}'.format(e)) + deferredException = e + + # Logfile scan setup + # Always use ignorePatterns from the command line + # For patterns in files, pefer the command line first, then any special settings for + # this executor, then fallback to the standard default (atlas_error_mask.db) + if 'ignorePatterns' in self.conf.argdict: + igPat = self.conf.argdict['ignorePatterns'].value + else: + igPat = [] + if 'ignoreFiles' in self.conf.argdict: + ignorePatterns = trfValidation.ignorePatterns(files = self.conf.argdict['ignoreFiles'].value, extraSearch=igPat) + elif self._errorMaskFiles is not None: + ignorePatterns = trfValidation.ignorePatterns(files = self._errorMaskFiles, extraSearch=igPat) + else: + ignorePatterns = trfValidation.ignorePatterns(files = athenaExecutor._defaultIgnorePatternFile, extraSearch=igPat) + + # Now actually scan my logfile + msg.info('Scanning logfile {0} for errors'.format(self._logFileName)) + self._logScan = trfValidation.athenaLogFileReport(logfile = self._logFileName, ignoreList = ignorePatterns) + worstError = self._logScan.worstError() + + # In general we add the error message to the exit message, but if it's too long then don't do + # that and just say look in the jobReport + if worstError['firstError']: + if len(worstError['firstError']['message']) > athenaExecutor._exitMessageLimit: + if 'CoreDumpSvc' in worstError['firstError']['message']: + exitErrorMessage = "Core dump at line {0} (see jobReport for further details)".format(worstError['firstError']['firstLine']) + elif 'G4Exception' in worstError['firstError']['message']: + exitErrorMessage = "G4 exception at line {0} (see jobReport for further details)".format(worstError['firstError']['firstLine']) + else: + exitErrorMessage = "Long {0} message at line {1} (see jobReport for further details)".format(worstError['level'], worstError['firstError']['firstLine']) + else: + exitErrorMessage = "Logfile error in {0}: \"{1}\"".format(self._logFileName, worstError['firstError']['message']) + else: + exitErrorMessage = "Error level {0} found (see athena logfile for details)".format(worstError['level']) + + # If we failed on the rc, then abort now + if deferredException is not None: + # Add any logfile information we have + if worstError['nLevel'] >= stdLogLevels['ERROR']: + deferredException.errMsg = deferredException.errMsg + "; {0}".format(exitErrorMessage) + raise deferredException + + + # Very simple: if we get ERROR or worse, we're dead, except if ignoreErrors=True + if worstError['nLevel'] == stdLogLevels['ERROR'] and ('ignoreErrors' in self.conf.argdict and self.conf.argdict['ignoreErrors'].value is True): + msg.warning('Found ERRORs in the logfile, but ignoring this as ignoreErrors=True (see jobReport for details)') + elif worstError['nLevel'] >= stdLogLevels['ERROR']: + self._isValidated = False + msg.error('Fatal error in athena logfile (level {0})'.format(worstError['level'])) + raise trfExceptions.TransformLogfileErrorException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), + 'Fatal error in athena logfile: "{0}"'.format(exitErrorMessage)) + + # Must be ok if we got here! + msg.info('Executor {0} has validated successfully'.format(self.name)) + self._isValidated = True + + + ## @brief Detect if AthenaMP is being used for this execution step + # @details Check environment and athena options + # Note that the special config option @c disableMP is used as an override + # so that we do not utilise AthenaMP for smart merging + # @return Tuple of two booleans: first is true if AthenaMPv2 is enabled, second is true + # if AthenaMPv1 is enabled + def _detectAthenaMP(self): + athenaMPVersion = 2 + + if self.conf._disableMP: + msg.debug('Executor configuration specified disabling AthenaMP') + return False, False + + try: + # First try and detect if any AthenaMP has been enabled + if 'ATHENA_PROC_NUMBER' in os.environ and (int(os.environ['ATHENA_PROC_NUMBER']) is not 0): + msg.info('Detected non-zero ATHENA_PROC_NUMBER ({0}) - setting athenaMP=True flag'.format(os.environ['ATHENA_PROC_NUMBER'])) + athenaMPEnabled = True + elif 'athenaopts' in self.conf.argdict and len([opt for opt in self.conf.argdict['athenaopts'].value if '--nprocs' in opt]) > 0: + msg.info('Detected --nprocs argument for athena - setting athenaMP=True flag') + athenaMPEnabled = True + else: + athenaMPEnabled = False + + # If AthenaMP has not been enabled, we don't care about the version + if not athenaMPEnabled: + msg.info('No AthenaMP options found - assuming normal athena run') + return False, False + + # Now need to see if we're running with AthenaMP v1 or v2. In v1 AthenaMP + # handles all special merging and setup, so we ignore it. In v2 the + # transform takes an active role in smart merging and job setup. + # We signal AthenaMPv1 by returning False, True; v2 by True, False + from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps + if 'Version' in dir(AthenaMPJobProps.AthenaMPFlags): + if AthenaMPJobProps.AthenaMPFlags.Version == 1: + msg.info("AthenaMP properties indicates version 1 - no special AthenaMP processing will be done") + return False, True + elif releaseIsOlderThan(17, 7): + msg.info("Release is older than 17.7, so assuming AthenaMP version 1 - no special AthenaMP processing will be done") + return False, True + return True, False + + except ValueError: + msg.error('Could not understand ATHENA_PROC_NUMBER environment variable (int conversion failed)') + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Invalid ATHENA_PROC_NUMBER environment variable') + except OSError, e: + msg.error('Problem running AthenaMP detection command: {0} raised {1}'.format(detectCmd, e)) + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error when detecting AthenaMP version') + + + ## @brief Prepare the correct command line to be used to invoke athena + def _prepAthenaCommandLine(self): + ## Start building up the command line + # N.B. it's possible we might have cases where 'athena' and 'athenaopt' should be substep args + # but at the moment this hasn't been requested. + if 'athena' in self.conf.argdict: + self._exe = self.conf.argdict['athena'].value + self._cmd = [self._exe] + + # See if there's a preloadlibs and a request to update LD_PRELOAD for athena + if 'LD_PRELOAD' in self._envUpdate._envdict: + preLoadUpdated = False + if 'athenaopts' in self.conf.argdict: + for athArg in self.conf.argdict['athenaopts'].value: + # This code is pretty ugly as the athenaopts argument contains + # strings which are really key/value pairs + if athArg.startswith('--preloadlib'): + try: + i = self.conf.argdict['athenaopts'].value.index(athArg) + k, v = athArg.split('=', 1) + msg.info('Updating athena --preloadlib option with: {0}'.format(self._envUpdate.value('LD_PRELOAD'))) + self.conf.argdict['athenaopts']._value[i] = '--preloadlib={0}:{1}'.format(self._envUpdate.value('LD_PRELOAD'), v) + except Exception, e: + msg.warning('Failed to interpret athena option: {0} ({1})'.format(athArg, e)) + preLoadUpdated = True + break + if not preLoadUpdated: + msg.info('Setting athena preloadlibs to: {0}'.format(self._envUpdate.value('LD_PRELOAD'))) + if 'athenaopts' in self.conf.argdict: + self.conf.argdict['athenaopts'].append("--preloadlib={0}".format(self._envUpdate.value('LD_PRELOAD'))) + else: + self.conf.argdict['athenaopts'] = trfArgClasses.argList(["--preloadlib={0}".format(self._envUpdate.value('LD_PRELOAD'))]) + + # Now update command line with the options we have (including any changes to preload) + if 'athenaopts' in self.conf.argdict: + self._cmd.extend(self.conf.argdict['athenaopts'].value) + + ## Add --drop-and-reload if possible (and allowed!) + if self._tryDropAndReload: + if self._athenaMPv1: + msg.info('Disabling "--drop-and-reload" because the job is configured to use AthenaMP v1') + elif 'athenaopts' in self.conf.argdict: + athenaConfigRelatedOpts = ['--config-only','--drop-and-reload','--drop-configuration','--keep-configuration'] + # Note for athena options we split on '=' so that we properly get the option and not the whole "--option=value" string + conflictOpts = set(athenaConfigRelatedOpts).intersection(set([opt.split('=')[0] for opt in self.conf.argdict['athenaopts'].value])) + if len(conflictOpts) > 0: + msg.info('Not appending "--drop-and-reload" to athena command line because these options conflict: {0}'.format(list(conflictOpts))) + else: + msg.info('Appending "--drop-and-reload" to athena options') + self._cmd.append('--drop-and-reload') + else: + # This is the 'standard' case - so drop and reload should be ok + msg.info('Appending "--drop-and-reload" to athena options') + self._cmd.append('--drop-and-reload') + else: + msg.info('Skipping test for "--drop-and-reload" in this executor') + + # Add topoptions + if self._skeleton is not None: + self._cmd += self._topOptionsFiles + msg.info('Updated script arguments with topoptions: %s' % self._cmd) + + + ## @brief Write a wrapper script which runs asetup and then athena + def _writeAthenaWrapper(self, asetup=None, dbsetup=None): + self._originalCmd = self._cmd + self._asetup = asetup + self._dbsetup = dbsetup + self._wrapperFile = 'runwrapper.{0}.sh'.format(self._name) + msg.debug('Preparing warpper file {0} with asetup={1} and dbsetup={2}'.format(self._wrapperFile, self._asetup, self._dbsetup)) + try: + with open(self._wrapperFile, 'w') as wrapper: + print >>wrapper, '#! /bin/sh' + if asetup: + print >>wrapper, "# asetup" + print >>wrapper, 'echo Sourcing {0}/scripts/asetup.sh {1}'.format(os.environ['AtlasSetup'], asetup) + print >>wrapper, 'source {0}/scripts/asetup.sh {1}'.format(os.environ['AtlasSetup'], asetup) + print >>wrapper, 'if [ $? != "0" ]; then exit 255; fi' + if dbsetup: + dbroot = os.path.dirname(dbsetup) + dbversion = os.path.basename(dbroot) + print >>wrapper, "# DBRelease setup" + print >>wrapper, 'echo Setting up DBRelease {0} environment'.format(dbroot) + print >>wrapper, 'export DBRELEASE={0}'.format(dbversion) + print >>wrapper, 'export CORAL_AUTH_PATH={0}'.format(os.path.join(dbroot, 'XMLConfig')) + print >>wrapper, 'export CORAL_DBLOOKUP_PATH={0}'.format(os.path.join(dbroot, 'XMLConfig')) + print >>wrapper, 'export TNS_ADMIN={0}'.format(os.path.join(dbroot, 'oracle-admin')) + print >>wrapper, 'DATAPATH={0}:$DATAPATH'.format(dbroot) + if self.conf._disableMP: + print >>wrapper, "# AthenaMP explicitly disabled for this executor" + print >>wrapper, "unset ATHENA_PROC_NUMBER" + if self._envUpdate.len > 0: + print >>wrapper, "# Customised environment" + for envSetting in self._envUpdate.values: + if not envSetting.startswith('LD_PRELOAD'): + print >>wrapper, "export", envSetting + print >>wrapper, ' '.join(self._cmd) + os.chmod(self._wrapperFile, 0755) + except (IOError, OSError) as e: + errMsg = 'Got an error when writing athena wrapper {0}: {1}'.format(self._wrapperFile, e) + msg.error(errMsg) + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'), errMsg) + self._cmd = [os.path.join('.', self._wrapperFile)] + + + ## @brief Manage smart merging of output files + # @param fileArg File argument to merge + def _smartMerge(self, fileArg): + ## @note Produce a list of merge jobs - this is a list of lists + # @todo This should be configurable! + # @note Value is set very low for now for testing + + ## @note only file arguments which support selfMerge() can be merged + if 'selfMerge' not in dir(fileArg): + msg.info('Files in {0} cannot merged (no selfMerge() method is implemented)'.format(fileArg.name)) + return + + if fileArg.mergeTargetSize == 0: + msg.info('Files in {0} will not be merged as target size is set to 0)'.format(fileArg.name)) + return + + + mergeCandidates = [list()] + currentMergeSize = 0 + for file in fileArg.value: + size = fileArg.getSingleMetadata(file, 'file_size') + if type(size) not in (int, long): + msg.warning('File size metadata for {0} was not correct, found type {1}. Aborting merge attempts.'.format(fileArg, type(size))) + return + # if there is no file in the job, then we must add it + if len(mergeCandidates[-1]) == 0: + msg.debug('Adding file {0} to current empty merge list'.format(file)) + mergeCandidates[-1].append(file) + currentMergeSize += size + continue + # see if adding this file gets us closer to the target size (but always add if target size is negative) + if fileArg.mergeTargetSize < 0 or math.fabs(currentMergeSize + size - fileArg.mergeTargetSize) < math.fabs(currentMergeSize - fileArg.mergeTargetSize): + msg.debug('Adding file {0} to merge list {1} as it gets closer to the target size'.format(file, mergeCandidates[-1])) + mergeCandidates[-1].append(file) + currentMergeSize += size + continue + # close this merge list and start a new one + msg.debug('Starting a new merge list with file {0}'.format(file)) + mergeCandidates.append([file]) + currentMergeSize = size + + msg.debug('First pass splitting will merge files in this way: {0}'.format(mergeCandidates)) + + counter = 0 + for mergeGroup in mergeCandidates: + counter += 1 + # If we only have one merge group, then preserve the original name (important for + # prodsys v1). Otherwise we use the new merged names. + if len(mergeCandidates) == 1: + mergeName = fileArg.originalName + else: + mergeName = fileArg.originalName + '.merge.{0}'.format(counter) + msg.info('Want to merge files {0} to {1}'.format(mergeGroup, mergeName)) + if len(mergeGroup) <= 1: + msg.info('Skip merging for single file') + else: + ## We want to parallelise this part! + fileArg.selfMerge(output=mergeName, inputs=mergeGroup, argdict=self.conf.argdict) + + +class hybridPOOLMergeExecutor(athenaExecutor): + ## @brief Initialise hybrid POOL merger athena executor + # @param name Executor name + # @param trf Parent transform + # @param skeletonFile athena skeleton job options file + # @param exe Athena execution script + # @param exeArgs Transform argument names whose value is passed to athena + # @param substep The athena substep this executor represents + # @param inputEventTest Boolean switching the skipEvents < inputEvents test + # @param perfMonFile Name of perfmon file for this substep (used to retrieve vmem/rss information) + # @param tryDropAndReload Boolean switch for the attempt to add '--drop-and-reload' to athena args + # @param hybridMerge Boolean activating hybrid merger (if set to 'None' then the hybridMerge will + # be used if n_inputs <= 16, otherwise a classic merge will happen for better downstream i/o + # performance) + def __init__(self, name = 'hybridPOOLMerge', trf = None, conf = None, skeletonFile = 'RecJobTransforms/skeleton.MergePool_tf.py', inData = set(), + outData = set(), exe = 'athena.py', exeArgs = ['athenaopts'], substep = None, inputEventTest = True, + perfMonFile = None, tryDropAndReload = True, hybridMerge = None, extraRunargs = {}, + manualDataDictionary = None): + + # By default we will do a hybridMerge + self._hybridMerge = hybridMerge + self._hybridMergeTmpFile = 'events.pool.root' + super(hybridPOOLMergeExecutor, self).__init__(name, trf=trf, conf=conf, skeletonFile=skeletonFile, inData=inData, + outData=outData, exe=exe, exeArgs=exeArgs, substep=substep, + inputEventTest=inputEventTest, perfMonFile=perfMonFile, + tryDropAndReload=tryDropAndReload, extraRunargs=extraRunargs, + manualDataDictionary=manualDataDictionary) + + def preExecute(self, input = set(), output = set()): + # Now check to see if the fastPoolMerger option was set + if 'fastPoolMerge' in self.conf.argdict: + msg.info('Setting hybrid merge to {0}'.format(self.conf.argdict['fastPoolMerge'].value)) + self._hybridMerge = self.conf.argdict['fastPoolMerge'].value + else: + # If not, use the "automatic" setting - only do a hybrid merge if we have <= 16 input files + inFiles = len(self.conf.dataDictionary[list(input)[0]].value) + if inFiles > 16: + msg.info("Hybrid merging is disabled as there are {0} input files (>16)".format(inFiles)) + self._hybridMerge = False + else: + msg.info("Hybrid merging is activated for {0} input files".format(inFiles)) + self._hybridMerge = True + # Need to add this as a runarg for skeleton to execute properly + self.conf.addToArgdict('fastPoolMerge', trfArgClasses.argBool(True)) + + if self._hybridMerge: + # If hybridMerge is activated then we process no events at the athena step, + # so set a ridiculous skipEvents value + self._extraRunargs.update({'skipEvents': 1000000}) + + super(hybridPOOLMergeExecutor, self).preExecute(input=input, output=output) + + + def execute(self): + # First call the parent executor, which will manage the athena execution for us + super(hybridPOOLMergeExecutor, self).execute() + + # Now, do we need to do the fast event merge? + if not self._hybridMerge: + return + + # Save the stub file for debugging... + stubFile = self.conf.dataDictionary[list(self._output)[0]].value[0] + stubFileSave = stubFile + ".tmp" + msg.info('Saving metadata stub file {0} to {1}'.format(stubFile, stubFileSave)) + shutil.copy(stubFile, stubFileSave) + + # Now do the hybrid merge steps - note we disable checkEventCount for this - it doesn't make sense here + fastConf = copy.copy(self.conf) + fastConf.addToArgdict('checkEventCount', trfArgClasses.argSubstepBool("all:False", runarg=False)) + fastEventMerge1 = scriptExecutor(name='fastEventMerge_step1', conf=fastConf, inData=self._inData, outData=self._outData, + exe='mergePOOL.exe', exeArgs=None) + fastEventMerge1._cmd = ['mergePOOL.exe', '-o', self._hybridMergeTmpFile] + for file in self.conf.dataDictionary[list(self._input)[0]].value: + fastEventMerge1._cmd.extend(['-i', file]) + fastEventMerge1._cmd.extend(['-e', 'MetaData', '-e', 'MetaDataHdrDataHeaderForm', '-e', 'MetaDataHdrDataHeader', '-e', 'MetaDataHdr']) + + msg.debug('Constructed this command line for fast event merge step 1: {0}'.format(fastEventMerge1._cmd)) + fastEventMerge1.doAll() + + + fastEventMerge2 = scriptExecutor(name='fastEventMerge_step2', conf=fastConf, inData=self._inData, outData=self._outData, + exe='mergePOOL.exe', exeArgs=None) + fastEventMerge2._cmd = ['mergePOOL.exe', '-o', self._hybridMergeTmpFile] + fastEventMerge2._cmd.extend(['-i', self.conf.dataDictionary[list(self._output)[0]].value[0]]) + + msg.debug('Constructed this command line for fast event merge step 2: {0}'.format(fastEventMerge2._cmd)) + fastEventMerge2.doAll() + + # Ensure we count all the mergePOOL.exe stuff in the resource report + self._exeStop = os.times() + + # And finally... + msg.info('Renaming {0} to {1}'.format(self._hybridMergeTmpFile, self.conf.dataDictionary[list(self._output)[0]].value[0])) + try: + os.rename(self._hybridMergeTmpFile, self.conf.dataDictionary[list(self._output)[0]].value[0]) + self.conf.dataDictionary[list(self._output)[0]]._resetMetadata() + # Stupid PoolFileCatalog now has the wrong GUID for the output file. Delete it for safety. + if os.access('PoolFileCatalog.xml', os.R_OK): + os.unlink('PoolFileCatalog.xml') + except (IOError, OSError) as e: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), + 'Exception raised when renaming {0} to {1}: {2}'.format(self._hybridMergeTmpFile, self.conf.dataDictionary[list(self._output)[0]].value[0], e)) + + +## @brief Specialist executor to manage the handling of multiple implicit input +# and output files within the reduction framework. +class reductionFrameworkExecutor(athenaExecutor): + + ## @brief Take inputDAODFile and setup the actual outputs needed + # in this job. + def preExecute(self, input=set(), output=set()): + msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) + + if 'reductionConf' not in self.conf.argdict: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), + 'No reduction configuration specified') + if 'DAOD' not in output: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), + 'No base name for DAOD reduction') + + for reduction in self.conf.argdict['reductionConf'].value: + dataType = 'DAOD_' + reduction + outputName = 'DAOD_' + reduction + '_' + self.conf.argdict['outputDAODFile'].value[0] + msg.info('Adding reduction output type {0}'.format(dataType)) + output.add(dataType) + newReduction = trfArgClasses.argPOOLFile(outputName, io='output', runarg=True, type='aod', + name=reduction) + # References to _trf - can this be removed? + self.conf.dataDictionary[dataType] = newReduction + + # Clean up the stub file from the executor input and the transform's data dictionary + # (we don't remove the actual argFile instance) + output.remove('DAOD') + del self.conf.dataDictionary['DAOD'] + del self.conf.argdict['outputDAODFile'] + + msg.info('Data dictionary is now: {0}'.format(self.conf.dataDictionary)) + msg.info('Input/Output: {0}/{1}'.format(input, output)) + + super(reductionFrameworkExecutor, self).preExecute(input, output) + + +## @brief Specialist executor to manage the handling of multiple implicit input +# and output files within the reduction framework. +# @note This is the temporary executor used for NTUP->DNTUP. It will be dropped +# after the move to D(x)AOD. +class reductionFrameworkExecutorNTUP(athenaExecutor): + + ## @brief Take inputDNTUPFile and setup the actual outputs needed + # in this job. + def preExecute(self, input=set(), output=set()): + msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) + + if 'reductionConf' not in self.conf.argdict: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), + 'No reduction configuration specified') + if 'DNTUP' not in output: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), + 'No base name for DNTUP reduction') + + for reduction in self.conf.argdict['reductionConf'].value: + dataType = 'DNTUP_' + reduction + # Prodsys 1 request - don't add a suffix, but replace DNTUP with DNTUP_TYPE + outputName = self.conf.argdict['outputDNTUPFile'].value[0].replace('DNTUP', dataType) + if outputName == self.conf.argdict['outputDNTUPFile'].value[0]: + # Rename according to the old scheme + outputName = self.conf.argdict['outputDNTUPFile'].value[0] + '_' + reduction + '.root' + msg.info('Adding reduction output type {0}, target filename {1}'.format(dataType, outputName)) + output.add(dataType) + newReduction = trfArgClasses.argNTUPFile(outputName, io='output', runarg=True, type='NTUP', subtype=dataType, + name=reduction, treeNames=['physics']) + self.conf.dataDictionary[dataType] = newReduction + + # Clean up the stub file from the executor input and the transform's data dictionary + # (we don't remove the actual argFile instance) + output.remove('DNTUP') + del self.conf.dataDictionary['DNTUP'] + del self.conf.argdict['outputDNTUPFile'] + + msg.info('Data dictionary is now: {0}'.format(self.conf.dataDictionary)) + msg.info('Input/Output: {0}/{1}'.format(input, output)) + + super(reductionFrameworkExecutorNTUP, self).preExecute(input, output) + + +## @brief Specialist execution class for merging DQ histograms +class DQMergeExecutor(scriptExecutor): + def __init__(self, name='DQHistMerge', trf=None, conf=None, inData=set(['HIST_AOD', 'HIST_ESD']), outData=set(['HIST']), + exe='DQHistogramMerge.py', exeArgs = []): + + self._histMergeList = 'HISTMergeList.txt' + + super(DQMergeExecutor, self).__init__(name=name, trf=trf, conf=conf, inData=inData, outData=outData, exe=exe, exeArgs=exeArgs) + + + def preExecute(self, input = set(), output = set()): + msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) + + super(DQMergeExecutor, self).preExecute(input=input, output=output) + + # Write the list of files to be merged + with open(self._histMergeList, 'w') as DQMergeFile: + for dataType in input: + for file in self.conf.dataDictionary[dataType].value: + self.conf.dataDictionary[dataType]._getNumberOfEvents([file]) + print >>DQMergeFile, file + + self._cmd.append(self._histMergeList) + + # Add the output file + if len(output) != 1: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), + 'One (and only one) output file must be given to {0} (got {1})'.format(self.name, len(output))) + outDataType = list(output)[0] + self._cmd.append(self.conf.dataDictionary[outDataType].value[0]) + + # Set the run_post_processing to False + self._cmd.append('False') + + +## @brief Specialist execution class for merging NTUPLE files +class NTUPMergeExecutor(scriptExecutor): + + def preExecute(self, input = set(), output = set()): + msg.debug('[NTUP] Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) + + # Basic command, and allow overwrite of the output file + if self._exe is None: + self._exe = 'hadd' + self._cmd = [self._exe, "-f"] + + + # Add the output file + if len(output) != 1: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), + 'One (and only one) output file must be given to {0} (got {1})'.format(self.name, len(output))) + outDataType = list(output)[0] + self._cmd.append(self.conf.dataDictionary[outDataType].value[0]) + # Add to be merged to the cmd chain + for dataType in input: + self._cmd.extend(self.conf.dataDictionary[dataType].value) + + super(NTUPMergeExecutor, self).preExecute(input=input, output=output) + +## @brief Specalise the athena executor to deal with the BS merge oddity of excluding empty DRAWs +class bsMergeExecutor(scriptExecutor): + + def preExecute(self, input = set(), output = set()): + self._maskedFiles = [] + if 'BS' in self.conf.argdict and 'maskEmptyInputs' in self.conf.argdict and self.conf.argdict['maskEmptyInputs'].value is True: + eventfullFiles = [] + for file in self.conf.dataDictionary['BS'].value: + nEvents = self.conf.dataDictionary['BS'].getSingleMetadata(file, 'nentries') + msg.debug('Found {0} events in file {1}'.format(nEvents, file)) + if isinstance(nEvents, int) and nEvents > 0: + eventfullFiles.append(file) + self._maskedFiles = list(set(self.conf.dataDictionary['BS'].value) - set(eventfullFiles)) + if len(self._maskedFiles) > 0: + msg.info('The following input files are masked because they have 0 events: {0}'.format(' '.join(self._maskedFiles))) + if len(self.conf.dataDictionary['BS'].value) == 0: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'All input files had zero events - aborting BS merge') + + # Write the list of input files to a text file, so that testMergedFiles can swallow it + self._mergeBSFileList = '{0}.list'.format(self._exe) + self._mergeBSLogfile = '{0}.out'.format(self._exe) + try: + with open(self._mergeBSFileList, 'w') as BSFileList: + for file in self.conf.dataDictionary['BS'].value: + if file not in self._maskedFiles: + print >>BSFileList, file + except (IOError, OSError) as e: + errMsg = 'Got an error when writing list of BS files to {0}: {1}'.format(self._mergeBSFileList, e) + msg.error(errMsg) + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'), errMsg) + + # Hope that we were given a correct filename... + self._outputFilename = self.conf.dataDictionary['BS_MRG'].value[0] + if self._outputFilename.endswith('._0001.data'): + self._doRename = False + self._outputFilename = self._outputFilename.split('._0001.data')[0] + elif self.conf.argdict['allowRename'].value == True: + # OK, non-fatal, we go for a renaming + msg.warning('Output filename does not end in "._0001.data" will proceed, but be aware that the internal filename metadata will be wrong') + self._doRename = True + else: + # No rename allowed, so we are dead... + errmsg = 'Output filename for outputBS_MRGFile must end in "._0001.data" or infile metadata will be wrong' + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), errmsg) + + # Set the correct command for execution + self._cmd = [self._exe, self._mergeBSFileList, '0', self._outputFilename] + + super(bsMergeExecutor, self).preExecute(input=input, output=output) + + + def postExecute(self): + if self._doRename: + self._expectedOutput = self._outputFilename + '._0001.data' + msg.info('Renaming {0} to {1}'.format(self._expectedOutput, self.conf.dataDictionary['BS_MRG'].value[0])) + try: + os.rename(self._outputFilename + '._0001.data', self.conf.dataDictionary['BS_MRG'].value[0]) + except OSError, e: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), + 'Exception raised when renaming {0} to {1}: {2}'.format(self._outputFilename, self.conf.dataDictionary['BS_MRG'].value[0], e)) + super(bsMergeExecutor, self).postExecute() + + + +class tagMergeExecutor(scriptExecutor): + + def preExecute(self, input = set(), output = set()): + # Just need to write the customised CollAppend command line + self._cmd = [self._exe, '-src'] + for dataType in input: + for file in self.conf.dataDictionary[dataType].value: + self._cmd.extend(['PFN:{0}'.format(file), 'RootCollection']) + self._cmd.extend(['-dst', 'PFN:{0}'.format(self.conf.dataDictionary[list(output)[0]].value[0]), 'RootCollection', '-nevtcached', '5000']) + + # In AthenaMP jobs the output file can be created empty, which CollAppend does not like + # so remove it + if os.access(self.conf.dataDictionary[list(output)[0]].value[0], os.F_OK): + os.unlink(self.conf.dataDictionary[list(output)[0]].value[0]) + + super(tagMergeExecutor, self).preExecute(input=input, output=output) + + + def validate(self): + super(tagMergeExecutor, self).validate() + + # Now scan the logfile... + try: + msg.debug('Scanning TAG merging logfile {0}'.format(self._logFileName)) + with open(self._logFileName) as logfile: + for line in logfile: + # Errors are signaled by 'error' (case independent) and NOT ('does not exist' or 'hlterror') + # Logic copied from Tier 0 TAGMerge_trf.py + if 'error' in line.lower(): + if 'does not exist' in line: + continue + if 'hlterror' in line: + continue + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), + 'Found this error message in the logfile {0}: {1}'.format(self._logFileName, line)) + except (OSError, IOError) as e: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), + 'Exception raised while attempting to scan logfile {0}: {1}'.format(self._logFileName, e)) + + +## @brief Archive transform - use tar +class archiveExecutor(scriptExecutor): + + def preExecute(self, input = set(), output = set()): + # Set the correct command for execution + self._cmd = [self._exe, '-c', '-v',] + if 'compressionType' in self.conf.argdict.keys(): + if self.conf.argdict['compressionType'] == 'gzip': + self._cmd.append('-z') + elif self.conf.argdict['compressionType'] == 'bzip2': + self._cmd.append('-j') + elif self.conf.argdict['compressionType'] == 'none': + pass + self._cmd.extend(['-f', self.conf.argdict['outputArchFile'].value[0]]) + self._cmd.extend(self.conf.argdict['inputDataFile'].value) + + super(archiveExecutor, self).preExecute(input=input, output=output) + diff --git a/Tools/PyJobTransforms/python/trfExitCodes.py b/Tools/PyJobTransforms/python/trfExitCodes.py new file mode 100644 index 0000000000000000000000000000000000000000..4118786389612f9ebe34616de1eb8218aa06a515 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfExitCodes.py @@ -0,0 +1,179 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfExitCodes +# +# @brief Module for transform exit codes +# @details Define all transform exit codes with their acronymns +# @remarks Usual usage is to import @c trfExit from this module +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfExitCodes.py 609252 2014-07-29 16:20:33Z wbreaden $ +# + +import signal +import unittest + +import logging +msg = logging.getLogger(__name__) + +## @brief Little class to hold the three attributes of a transform error +class trfExitCode(object): + + ## @brief Exit code class instantiation + # @param acronymn The internal transform acronymn for this error code + # @param numcode The numerical exit code for this error + # @param description A human comprehensible description of this error's meaning + def __init__(self, acronymn, numcode, description): + self._acronymn = acronymn + self._numcode = numcode + self._description = description + + @property + def acronymn(self): + return self._acronymn + + @property + def numcode(self): + return self._numcode + + @property + def description(self): + return self._description + +## @note Restructured exit codes to avoid clashes with the recognised exit codes from +# the old transforms. These are: +# self.errorCodes['transExitCode'][1] = 'Unspecified error, consult log file' +# self.errorCodes['transExitCode'][2] = 'Payload core dump' +# self.errorCodes['transExitCode'][6] = 'TRF_SEGVIO - Segmentation violation' +# self.errorCodes['transExitCode'][10] = 'ATH_FAILURE - Athena non-zero exit' +# self.errorCodes['transExitCode'][26] = 'TRF_ATHENACRASH - Athena crash' +# self.errorCodes['transExitCode'][30] = 'TRF_PYT - transformation python error' +# self.errorCodes['transExitCode'][31] = 'TRF_ARG - transformation argument error' +# self.errorCodes['transExitCode'][32] = 'TRF_DEF - transformation definition error' +# self.errorCodes['transExitCode'][33] = 'TRF_ENV - transformation environment error' +# self.errorCodes['transExitCode'][34] = 'TRF_EXC - transformation exception' +# self.errorCodes['transExitCode'][40] = 'Athena crash - consult log file' +# self.errorCodes['transExitCode'][41] = 'TRF_OUTFILE - output file error' +# self.errorCodes['transExitCode'][42] = 'TRF_CONFIG - transform config file error' +# self.errorCodes['transExitCode'][50] = 'TRF_DB - problems with database' +# self.errorCodes['transExitCode'][51] = 'TRF_DBREL_TARFILE - Problems with the DBRelease tarfile' +# self.errorCodes['transExitCode'][60] = 'TRF_GBB_TIME - GriBB - output limit exceeded (time, memory, CPU)' +# self.errorCodes['transExitCode'][79] = 'Copying input file failed' +# self.errorCodes['transExitCode'][80] = 'file in trf definition not found, using the expandable syntax' +# self.errorCodes['transExitCode'][81] = 'file in trf definition not found, using the expandable syntax -- pileup case' +# self.errorCodes['transExitCode'][85] = 'analysis output merge crash - consult log file' +# self.errorCodes['transExitCode'][98] = 'Oracle error - session limit reached' +# self.errorCodes['transExitCode'][99] = 'TRF_UNKNOWN - unknown transformation error' +# self.errorCodes['transExitCode'][102] = 'One of the output files did not get produced by the job' +# self.errorCodes['transExitCode'][104] = 'Copying the output file to local SE failed (md5sum or size mismatch, or LFNnonunique)' +# self.errorCodes['transExitCode'][126] = 'Transformation not executable - consult log file' +# self.errorCodes['transExitCode'][127] = 'Transformation not installed in CE' +# self.errorCodes['transExitCode'][134] = 'Athena core dump or timeout, or conddb DB connect exception' +# self.errorCodes['transExitCode'][141] = "No input file available - check availability of input dataset at site" +# self.errorCodes['transExitCode'][200] = 'Log file not transferred to destination' +# self.errorCodes['transExitCode'][220] = 'Proot: An exception occurred in the user analysis code' +# self.errorCodes['transExitCode'][221] = 'Proot: Framework decided to abort the job due to an internal problem' +# self.errorCodes['transExitCode'][222] = 'Proot: Job completed without reading all input files' +# self.errorCodes['transExitCode'][223] = 'Proot: Input files cannot be opened' +# Taken from: https://svnweb.cern.ch/trac/panda/browser/monitor/ErrorCodes.py?rev=13963 +# +# Because of this restructuring the top bits categorisation of the error codes is no longer maintained. +# +# Further error codes to avoid are: +# ERROR_OPEN_FAILED=232 +# ERROR_HIST_MISSING=233 +# ERROR_SSB_DIRECT=235 +# ERROR_SSB_DIRECT_DOWN=236 +# These are being used by the FAX analysis wrapper +# +# @note In certain circumstances a transform may catch a signal +# and exit more cleanly (SIGINT, SIGUSR1), in which case +# it will replicate the shell signaled exit code (128 + SIGNUM). +# These are mapped dynamically to k/v TRF_SIG_SIGNAME : SIGNUM+128. +# This is partly because numeric codes vary between platforms +# (e.g., SIGUSR1 = 10 (Linux) or 30 (OS X)), but it's also more +# reliable than typing it all by hand! +class trfExitCodes(object): + ## @note Hold error codes in a list of trfExitCode objects + _errorCodeList = list() + _errorCodeList.append(trfExitCode('OK', 0, 'Successful exit')) + _errorCodeList.append(trfExitCode('TRF_SETUP', 3, 'Transform setup error')) + _errorCodeList.append(trfExitCode('TRF_ARG_CONV_FAIL', 4, 'Failure to convert transform arguments to correct type')) + _errorCodeList.append(trfExitCode('TRF_ARG_OUT_OF_RANGE', 5, 'Argument out of allowed range')) + _errorCodeList.append(trfExitCode('TRF_ARG_ERROR', 7, 'Problem with an argument given to the transform')) + _errorCodeList.append(trfExitCode('TRF_ARG_DATASET', 8, 'Inconsistent dataset value in input file list')) + _errorCodeList.append(trfExitCode('TRF_INPUT_FILE_ERROR', 9, 'Error found with transform input file')) + _errorCodeList.append(trfExitCode('TRF_OUTPUT_FILE_ERROR', 11, 'Error when handling transform output file')) + _errorCodeList.append(trfExitCode('TRF_GRAPH_ERROR', 12, 'Problem in setting up the substep graph')) + _errorCodeList.append(trfExitCode('TRF_EXECUTION_PATH_ERROR', 13, 'Problem trying to generate requested outputs from given inputs - graph give no path to do this')) + _errorCodeList.append(trfExitCode('TRF_KEY_ERROR', 14, 'Key error in a transform internal dictionary')) + _errorCodeList.append(trfExitCode('TRF_NOEVENTS', 15, 'No events to be processed in the input file - skipEvents is higher than actual event number')) + _errorCodeList.append(trfExitCode('TRF_ARG_CHOICES_FAIL', 16, 'Invalid selection in a choice restricted argument')) + _errorCodeList.append(trfExitCode('TRF_ARG_MISSING', 17, 'Mandatory argument is missing')) + _errorCodeList.append(trfExitCode('TRF_REDUCTION_CONFIG_ERROR', 18, 'Invalid configuration of a reduction job')) + _errorCodeList.append(trfExitCode('TRF_GRAPH_STEERING_ERROR', 19, 'Problem when trying to apply steering to the substep graph')) + _errorCodeList.append(trfExitCode('TRF_INTERNAL_REPORT_ERROR', 35, 'Internal error while generating transform reports')) + _errorCodeList.append(trfExitCode('TRF_METADATA_CALL_FAIL', 36, 'Call to external metadata program failed')) + _errorCodeList.append(trfExitCode('TRF_RUNTIME_ERROR', 37, 'General runtime error')) + _errorCodeList.append(trfExitCode('TRF_EXEC_VALIDATION_EVENTCOUNT', 38, 'Event count validation failed')) + _errorCodeList.append(trfExitCode('TRF_DBRELEASE_PROBLEM', 39, 'Failed to setup DBRelease properly')) + _errorCodeList.append(trfExitCode('TRF_FILEMERGE_PROBLEM', 52, 'Problem while attempting to merge output files')) + _errorCodeList.append(trfExitCode('TRF_EXEC', 64, 'General failure in transform substep executor')) + _errorCodeList.append(trfExitCode('TRF_EXEC_FAIL', 65, 'Non-zero exit code from transform substep executor')) + _errorCodeList.append(trfExitCode('TRF_EXEC_VALIDATION_FAIL', 66, 'Validation failure in transform substep executor')) + _errorCodeList.append(trfExitCode('TRF_EXEC_TIMEOUT', 67, 'Transform substep executor timed out')) + _errorCodeList.append(trfExitCode('TRF_EXEC_LOGERROR', 68, 'Errors found in substep executor logfile')) + _errorCodeList.append(trfExitCode('TRF_EXEC_SETUP_FAIL', 69, 'Transform substep executor setup failed')) + _errorCodeList.append(trfExitCode('TRF_EXEC_SETUP_WRAPPER', 70, 'Transform substep executor wrapper script problem')) + _errorCodeList.append(trfExitCode('TRF_LOGFILE_FAIL', 71, 'Problem with substep executor logfile')) + _errorCodeList.append(trfExitCode('TRF_AMI_ERROR' , 72, 'Problem getting AMI tag info')) + _errorCodeList.append(trfExitCode('TRF_EXEC_RUNARGS_ERROR' , 73, 'Problem with executor runargs file')) + _errorCodeList.append(trfExitCode('TRF_INPUT_FILE_VALIDATION_FAIL' , 74, 'Input file failed validation')) + _errorCodeList.append(trfExitCode('TRF_OUTPUT_FILE_VALIDATION_FAIL' , 75, 'Output file failed validation')) + _errorCodeList.append(trfExitCode('TRF_UNEXPECTED_TRF_EXCEPTION', 250, 'Transform exception raised which the transform did not handle')) + _errorCodeList.append(trfExitCode('TRF_UNEXPECTED_OTHER_EXCEPTION', 251, 'General exception raised which the transform did not handle')) + _errorCodeList.append(trfExitCode('TRF_INTERNAL', 252, 'Internal transform error')) + _errorCodeList.append(trfExitCode('TRF_UNKOWN', 253, 'Unknown error code')) + + # Add signaled exits + _errorCodeList.extend([trfExitCode('TRF_SIG_'+n, getattr(signal, n)+128, 'Transform received signal {0}'.format(n)) + for n in dir(signal) if n.startswith('SIG') and '_' not in n]) + + # Now map the entries to fast lookup dictionaries + _nameToCodeDict = dict() + _codeToNameDict = dict() + _nameToDescDict = dict() + for error in _errorCodeList: + _nameToCodeDict[error.acronymn] = error.numcode + _codeToNameDict[error.numcode] = error.acronymn + _nameToDescDict[error.acronymn] = error.description + + + def __init__(self): + pass + + @staticmethod + def nameToCode(name = None): + if name in trfExitCodes._nameToCodeDict: + return trfExitCodes._nameToCodeDict[name] + else: + msg.error('Could not map exit name %s to an exit code' % str(name)) + return trfExitCodes._nameToCodeDict['TRF_UNKOWN'] + + @staticmethod + def codeToName(code = None): + if code in trfExitCodes._codeToNameDict: + return trfExitCodes._codeToNameDict[code] + else: + msg.error('Could not map exit code %s to an exit name' % str(code)) + return 'TRF_UNKOWN' + + @staticmethod + def nameToDesc(name = None): + if name in trfExitCodes._nameToDescDict: + return trfExitCodes._nameToDescDict[name] + else: + msg.error('Could not map exit name %s to a description' % str(name)) + return 'No description available' + + +trfExit = trfExitCodes() diff --git a/Tools/PyJobTransforms/python/trfFileUtils.py b/Tools/PyJobTransforms/python/trfFileUtils.py new file mode 100644 index 0000000000000000000000000000000000000000..a818b7ece9bf9ee5341b8b07a1702a9eb9824422 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfFileUtils.py @@ -0,0 +1,310 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfFileUtils +# @brief Transform utilities to deal with files. +# @details Mainly used by argFile class. +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfFileUtils.py 602496 2014-06-18 19:31:32Z graemes $ +# @todo make functions timelimited + +import unittest + +from subprocess import Popen,PIPE + +import logging +msg = logging.getLogger(__name__) + +# @note Use the PyCmt forking decorator to ensure that ROOT is run completely within +# a child process and will not 'pollute' the parent python process with unthread-safe +# bits of code (otherwise strange hangs are observed on subsequent uses of ROOT) +import PyUtils.Decorators as _decos + +from PyUtils.RootUtils import import_root +from PyJobTransforms.trfDecorators import timelimited + +## @note The 'AODFixVersion' is can appear for AOD or ESD files +athFileInterestingKeys = ['beam_energy', 'beam_type', 'conditions_tag', 'file_size', + 'file_guid', 'file_type', 'geometry', 'lumi_block', 'nentries', 'run_number', + 'AODFixVersion'] + + + +## @brief Determines metadata of BS, POOL or TAG file. +# @details Trivial wrapper around PyUtils.AthFile. +# @param fileName Path(s) to the file for which the metadata are determined +# @param retrieveKeys Keys to extract from the @c AthFile.infos dictionary +# @return +# - Dictionary containing metadata of the file(s) +# - @c None if the determination failed. +@timelimited() +def AthenaFileInfo(fileNames, retrieveKeys = athFileInterestingKeys): + msg.debug('Calling AthenaFileInfo for {0}'.format(fileNames)) + + from PyUtils import AthFile + AthFile.server.flush_cache() + AthFile.server.disable_pers_cache() + + if isinstance(fileNames, str): + fileNames = [fileNames,] + + metaDict = {} + try: + ## @note This code is transitional, until all the versions of AthFile we + # use support pfopen(). It should then be removed. Graeme, 2013-11-05. + # Note to Future: Give it 6 months, then get rid of it! + if len(fileNames) > 1: + try: + athFile = AthFile.pfopen(fileNames) + except AttributeError: + msg.warning('This version of AthFile does not support "pfopen". Falling back to serial interface.') + athFile = AthFile.fopen(fileNames) + else: + athFile = AthFile.fopen(fileNames) + msg.debug('AthFile came back') + for fname, meta in zip(fileNames, athFile): + metaDict[fname] = {} + for key in retrieveKeys: + msg.debug('Looking for key {0}'.format(key)) + try: + # AODFix is tricky... it is absent in many files, but this is not an error + if key is 'AODFixVersion': + if 'tag_info' in meta.infos and isinstance('tag_info', dict) and 'AODFixVersion' in meta.infos['tag_info']: + metaDict[fname][key] = meta.infos['tag_info'][key] + else: + metaDict[fname][key] = '' + # beam_type seems odd for RAW - typical values seem to be [1] instead of 'collisions' or 'cosmics'. + # So we use the same scheme as AutoConfiguration does, mapping project names to known values + # It would be nice to import this all from AutoConfiguration, but there is no suitable method at the moment. + # N.B. This is under discussion so this code is temporary fix (Captain's Log, Stardate 2012-11-28) + elif key is 'beam_type': + try: + if isinstance(meta.infos[key], list) and len(meta.infos[key]) > 0 and meta.infos[key][0] in ('cosmics' ,'singlebeam','collisions'): + metaDict[fname][key] = meta.infos[key] + else: + from RecExConfig.AutoConfiguration import KnownCosmicsProjects, Known1BeamProjects, KnownCollisionsProjects, KnownHeavyIonProjects + if 'bs_metadata' in meta.infos.keys() and isinstance(meta.infos['bs_metadata'], dict) and 'Project' in meta.infos['bs_metadata'].keys(): + project = meta.infos['bs_metadata']['Project'] + elif 'tag_info' in meta.infos.keys() and isinstance(meta.infos['tag_info'], dict) and 'project_name' in meta.infos['tag_info'].keys(): + project = meta.infos['tag_info']['project_name'] + else: + msg.info('AthFile beam_type was not a known value ({0}) and no project could be found for this file'.format(meta.infos[key])) + metaDict[fname][key] = meta.infos[key] + continue + if project in KnownCollisionsProjects or project in KnownHeavyIonProjects: + metaDict[fname][key] = ['collisions'] + continue + if project in KnownCosmicsProjects: + metaDict[fname][key] = ['cosmics'] + continue + if project in Known1BeamProjects: + metaDict[fname][key] = ['singlebeam'] + continue + # Erm, so we don't know + msg.info('AthFile beam_type was not a known value ({0}) and the file\'s project ({1}) did not map to a known beam type using AutoConfiguration'.format(meta.infos[key], project)) + metaDict[fname][key] = meta.infos[key] + except Exception, e: + msg.error('Got an exception while trying to determine beam_type: {0}'.format(e)) + metaDict[fname][key] = meta.infos[key] + else: + metaDict[fname][key] = meta.infos[key] + except KeyError: + msg.warning('Missing key in athFile info: {0}'.format(key)) + msg.debug('Found these metadata for {0}: {1}'.format(fname, metaDict[fname].keys())) + return metaDict + except ValueError, e: + msg.error('Problem in getting AthFile metadata for {0}'.format(fileNames)) + return None + + +## @brief Determines number of events in a HIST file. +# @details Basically taken from PyJobTransformsCore.trfutil.MonitorHistFile +# @param fileName Path to the HIST file. +# @return +# - Number of events. +# - @c None if the determination failed. +# @note Use the PyCmt forking decorator to ensure that ROOT is run completely within +# a child process and will not 'pollute' the parent python process with unthread-safe +# bits of code (otherwise strange hangs are observed on subsequent uses of ROOT) +@_decos.forking +def HISTEntries(fileName): + + root = import_root() + + file = root.TFile.Open(fileName, 'READ') + + if not (isinstance(file, root.TFile) and file.IsOpen()): + return None + + rundir = None + keys = file.GetListOfKeys() + + for key in keys: + + name=key.GetName() + + if name.startswith('run_') and name is not 'run_multiple': + + if rundir is not None: + msg.warning('Found two run_ directories in HIST file %s: %s and %s' % ( fileName, rundir, name) ) + return None + else: + rundir = name + + del name + + if rundir is None: + msg.warning( 'Unable to find run directory in HIST file %s' % fileName ) + file.Close() + return None + + msg.info( 'Using run directory %s for event counting of HIST file %s. ' % ( rundir, fileName ) ) + + hpath = '%s/GLOBAL/DQTDataFlow/events_lb' % rundir + possibleLBs = [] + if 'tmp.HIST_' in fileName: + msg.info( 'Special case for temporary HIST file {0}. '.format( fileName ) ) + h = file.Get('{0}'.format(rundir)) + for directories in h.GetListOfKeys() : + if 'lb' in directories.GetName(): + msg.info( 'Using {0} in tmp HIST file {1}. '.format(directories.GetName(), fileName ) ) + hpath = rundir+'/'+str(directories.GetName())+'/GLOBAL/DQTDataFlow/events_lb' + possibleLBs.append(hpath) + else: + msg.info( 'Classical case for HIST file {0}. '.format( fileName ) ) + possibleLBs.append(hpath) + nev = 0 + if len(possibleLBs) == 0: + msg.warning( 'Unable to find events_lb histogram in HIST file %s' % fileName ) + file.Close() + return None + for hpath in possibleLBs: + h = file.Get(hpath) + + if not isinstance( h, root.TH1 ): + msg.warning( 'Unable to retrieve %s in HIST file %s.' % ( hpath, fileName ) ) + file.Close() + return None + + nBinsX = h.GetNbinsX() + nevLoc = 0 + + for i in xrange(1, nBinsX): + + if h[i] < 0: + msg.warning( 'Negative number of events for step %s in HIST file %s.' %( h.GetXaxis().GetBinLabel(i), fileName ) ) + file.Close() + return None + + elif h[i] == 0: + continue + + if nevLoc == 0: + nevLoc = h[i] + + else: + if nevLoc != h[i]: + msg.warning( 'Mismatch in events per step in HIST file %s; most recent step seen is %s.' % ( fileName, h.GetXaxis().GetBinLabel(i) ) ) + file.Close() + return None + nev += nevLoc + file.Close() + return nev + + + +## @brief Determines number of entries in NTUP file with given tree names. +# @details Basically taken from PyJobTransformsCore.trfutil.ntup_entries. +# @param fileName Path to the NTUP file. +# @param treeNames Tree name or list of tree names. +# In the latter case it is checked if all trees contain the same number of events +# @return +# - Number of entries. +# - @c None if the determination failed. +# @note Use the PyCmt forking decorator to ensure that ROOT is run completely within +# a child process and will not 'pollute' the parent python process with unthread-safe +# bits of code (otherwise strange hangs are observed on subsequent uses of ROOT) +@_decos.forking +def NTUPEntries(fileName, treeNames): + + if not isinstance( treeNames, list ): + treeNames=[treeNames] + + root = import_root() + + file = root.TFile.Open(fileName, 'READ') + + if not (isinstance(file, root.TFile) and file.IsOpen()): + return None + + prevNum=None + prevTree=None + + for treeName in treeNames: + + tree = file.Get(treeName) + + if not isinstance(tree, root.TTree): + return None + + num = tree.GetEntriesFast() + + if not num>=0: + msg.warning('GetEntriesFast returned non positive value for tree %s in NTUP file %s.' % ( treeName, fileName )) + return None + + if prevNum is not None and prevNum != num: + msg.warning( "Found diffferent number of entries in tree %s and tree %s of file %s." % ( treeName, prevTree, fileName )) + return None + + numberOfEntries=num + prevTree=treeName + del num + del tree + + file.Close() + + return numberOfEntries + + +## @brief Get the size of a file via ROOT's TFile +# @details Use TFile.Open to retrieve a ROOT filehandle, which will +# deal with all non-posix filesystems. Return the GetSize() value. +# The option filetype=raw is added to ensure this works for non-ROOT files too (e.g. BS) +# @note Use the PyCmt forking decorator to ensure that ROOT is run completely within +# a child process and will not 'pollute' the parent python process with unthread-safe +# bits of code (otherwise strange hangs are observed on subsequent uses of ROOT) +# @param filename Filename to get size of +# @return fileSize or None of there was a problem +@_decos.forking +def ROOTGetSize(filename): + root = import_root() + + try: + msg.debug('Calling TFile.Open for {0}'.format(filename)) + file = root.TFile.Open(filename + '?filetype=raw', 'READ') + fsize = file.GetSize() + msg.debug('Got size {0} from TFile.GetSize'.format(fsize)) + except ReferenceError: + msg.error('Failed to get size of {0}'.format(filename)) + return None + + file.Close() + del root + return fsize + + +## @brief Return the LAN access type for a file URL +# @param filename Name of file to examine +# @return +# - String with LAN protocol +def urlType(filename): + if filename.startswith('dcap:'): + return 'dcap' + if filename.startswith('root:'): + return 'root' + if filename.startswith('rfio:'): + return 'rfio' + if filename.startswith('file:'): + return 'posix' + return 'posix' + diff --git a/Tools/PyJobTransforms/python/trfFileValidationFunctions.py b/Tools/PyJobTransforms/python/trfFileValidationFunctions.py new file mode 100644 index 0000000000000000000000000000000000000000..d6e7d8ef8084c07cbce9964253f1cc26c51072f5 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfFileValidationFunctions.py @@ -0,0 +1,63 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfFileValidationFunctions +# @brief Transform file validation functions +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfUtils.py 578615 2014-01-15 21:22:05Z wbreaden $ + +import logging +msg = logging.getLogger(__name__) + +## @brief Integrity function for file class argPOOLFile, argHITSFile and argRDOFile +def returnIntegrityOfPOOLFile(file): + from PyJobTransforms.trfValidateRootFile import checkFile + rc = checkFile(fileName = file, type = 'event', requireTree = True) + if rc == 0: + return (True, "integrity of {fileName} good".format(fileName = str(file))) + else: + return (False, "integrity of {fileName} bad: return code: {integrityStatus}".format(fileName = str(file), integrityStatus = rc)) + +## @brief Integrity function for file class argNTUPFile +def returnIntegrityOfNTUPFile(file): + from PyJobTransforms.trfValidateRootFile import checkFile + rc = checkFile(fileName = file, type = 'basket', requireTree = False) + if rc == 0: + return (True, "integrity of {fileName} good".format(fileName = str(file))) + else: + return (False, "integrity of {fileName} bad: return code: {integrityStatus}".format(fileName = str(file), integrityStatus = rc)) + +## @brief Integrity function for file class argBSFile +def returnIntegrityOfBSFile(file): + try: + from PyJobTransforms.trfUtils import call + rc = call(["AtlListBSEvents.exe", "-c", file], + logger = msg, + message = "Report by AtlListBSEvents.exe: ", + timeout = None + ) + except trfExceptions.TransformTimeoutException: + return False + if rc == 0: + return (True, "integrity of {fileName} good".format(fileName = str(file))) + else: + return (False, "integrity of {fileName} bad: return code: {integrityStatus}".format(fileName = str(file), integrityStatus = rc)) + +### @brief Integrity function for file class argTAGFile +def returnIntegrityOfTAGFile(file): + from PyJobTransforms.trfFileUtils import AthenaFileInfo + dictionaryOfAthenaFileInfo = AthenaFileInfo([str(file),], retrieveKeys = ['nentries',]) + msg.debug("dictionary of Athena file information: {a}".format(a = dictionaryOfAthenaFileInfo)) + eventCount = dictionaryOfAthenaFileInfo[str(file)]['nentries'] + if eventCount is None: + return (False, "integrity of {fileName} bad: got a bad event count in {fileName}: {eventCount}".format(fileName = str(file), eventCount = eventCount)) + else: + return (True, "integrity of {fileName} good".format(fileName = str(file))) + +## @brief Integrity function for file class argHISTFile +def returnIntegrityOfHISTFile(file): + from PyJobTransforms.trfValidateRootFile import checkFile + rc = 0 # (default behaviour) + if rc == 0: + return (True, "integrity of {fileName} good".format(fileName = str(file))) + else: + return (False, "integrity of {fileName} bad: return code: {integrityStatus}".format(fileName = str(file), integrityStatus = rc)) diff --git a/Tools/PyJobTransforms/python/trfGraph.py b/Tools/PyJobTransforms/python/trfGraph.py new file mode 100644 index 0000000000000000000000000000000000000000..9ea77fcc7a9817bcf7096e9568a8bff3d8748744 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfGraph.py @@ -0,0 +1,586 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfGraph +## @brief Transform graph utilities +# @details Graph which represents transform executors (nodes) connected vis data types (edges) +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfGraph.py 604265 2014-06-30 14:53:32Z graemes $ +# @note There are a few well established python graph implementations, but none seem to be in the ATLAS +# release (NetworkX, igraph). Our needs are so basic that we might well be able to just take a few well +# known routines and have them in this module. See, e.g., http://www.python.org/doc/essays/graphs.html +# @note Basic idea is to have nodes representing athena jobs (sub-steps), with edges representing data. +# (It turns out this works, which is not true for the reverse representation when a job requires +# multiple inputs, e.g., DQHist merging needs the HIST_ESD and the HIST_AOD inputs, as edges can only +# connect 2 nodes; in contrast nodes can have an arbitrary number of edge connections.) +# The nodes have multiple input data types. Having multiple data types generally means that the +# node can execute is any of them are present (e.g., RDO or RAW). However, when multiple inputs +# are needed to execute these are bound into a tuple. +# @note We do not track one path through the graph - we track one path for each data type we need +# to produce and record which nodes get hit. Then each hit node is executed in order. Need to record +# which data objects are going to be produced ephemerally. One of the most important problems is that +# we are trying to minimise the cost of all the paths we need to take. To do this we start with +# data sorted in topological order (i.e., data which is produced earliest in the process first). +# Each path is traced back to the starting node and the cheapest is taken. Once nodes have been +# switched on for one data type they are assumed free for other data types. +# @note We represent a dataless edge with the fake data inputs and outputs {in,out}NULL. These +# are used by the graph tracer, but then removed from the input/output of the actual substeps. + + +import copy +import os +import sys +import unittest + +import logging +msg = logging.getLogger(__name__) + +import PyJobTransforms.trfExceptions as trfExceptions + +from PyJobTransforms.trfExitCodes import trfExit + + + + +## @brief Simple graph object describing the links between executors +class executorGraph(object): + + ## @brief Initialise executor graph + # @param executorSet Set of executor instances + # @param inputData Iterable with input data for this transform's execution + # @param outputData Iterable with output data for this transform's execution + def __init__(self, executorSet, inputData = set([]), outputData = set([])): + + # Set basic node list + self._nodeDict = {} + + msg.debug('Graph input data: {0}; output data {1}'.format(inputData, outputData)) + + if len(executorSet) == 1: + # Single executor - in this case inData/outData is not mandatory, so we set them to the + # input/output data of the transform + executor = list(executorSet)[0] + if len(executor._inData) is 0 and len(executor._outData) is 0: + executor.inData = inputData + executor.outData = outputData + + for executor in executorSet: + self.addNode(executor) + + self._inputData = set(inputData) + self._outputData = set(outputData) + + # It's forbidden for a transform to consume and produce the same datatype + dataOverlap = self._inputData & self._outputData + if len(dataOverlap) > 0: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_ERROR'), + 'Transform definition error, you cannot produce and consume the same datatypes in a transform. Duplicated input/output types {0}.'.format(' '.join(dataOverlap))) + + # Add a pseudo-start/stop nodes, from which input data flows and output data finally arrives + # This makes the graph 'concrete' for this job + # This is useful as then data edges all connect properly to a pair of nodes + # We add a node for every possible output as this enables topo sorting of the graph + # nodes for any intermediate data end nodes as well + pseudoNodes = dict() + pseudoNodes['_start'] = graphNode(name='_start', inData=[], outData=self._inputData, weight = 0) + for nodeName, node in self._nodeDict.iteritems(): + for dataType in node.outputDataTypes: + endNodeName = '_end_{0}'.format(dataType) + pseudoNodes[endNodeName] = graphNode(name=endNodeName, inData=[dataType], outData=[], weight = 0) + self._nodeDict.update(pseudoNodes) + + # Toposort not yet done + self._toposort = [] + self._toposortData = [] + + # Now find connections between nodes + self.findConnections() + + @property + def inputData(self): + return self._inputData + + @inputData.setter + def inputData(self, inputData): + self._inputData = set(inputData) + + @property + def outputData(self): + return self._outputData + + @inputData.setter + def outputData(self, outputData): + self._outputData = set(outputData) + + ## @brief Return a list of execution nodes with their data inputs/outputs + @property + def execution(self): + exeList = [] + for nodeName in self._toposort: + # Start and end nodes are not real - they never actually execute + if nodeName.startswith(('_start', '_end')): + continue + if self._execution[nodeName]['enabled'] is True: + exeList.append({'name': nodeName, 'input': self._execution[nodeName]['input'], + 'output': self._execution[nodeName]['output']}) + return exeList + + ## @brief Return a list of all data used in this execution + @property + def data(self): + dataset = set() + for nodeName in self._toposort: + # Start and end nodes are not real - they never actually execute + if nodeName.startswith(('_start', '_end')): + continue + if self._execution[nodeName]['enabled'] is True: + dataset.update(self._execution[nodeName]['input']) + dataset.update(self._execution[nodeName]['output']) + return dataset + + ## @brief Add an executor node to the graph + def addNode(self, executor): + self._nodeDict[executor.name] = executorNode(executor) + + + ## @brief Remove an executor node from the graph + def deleteNote(self, executor): + if executor.name in self._nodeDict: + del(self._nodeDict[executor.name]) + + + def _resetConnections(self): + for node in self._nodeDict.itervalues(): + node.resetConnections() + + ## @brief Look at executor nodes and work out how they are connected + # @note Anything better than n^2? Should be ok for our low numbers of nodes, but could be optimised + def findConnections(self): + self._resetConnections() + for nodeNameA, nodeA in self._nodeDict.iteritems(): + for nodeNameB, nodeB in self._nodeDict.iteritems(): + if nodeNameA is nodeNameB: + continue + dataIntersection = list(set(nodeA.outputDataTypes) & set(nodeB.inputDataTypes)) + msg.debug('Data connections between {0} and {1}: {2}'.format(nodeNameA, nodeNameB, dataIntersection)) + if len(dataIntersection) > 0: + nodeA.addConnection(nodeNameB, dataIntersection, type='out') + nodeB.addConnection(nodeNameA, dataIntersection, type='in') + + msg.debug('Graph connections are: \n{0}'.format(self)) + + ## @brief Find a topologically sorted list of the graph nodes + # @note If this is not possible, the graph is not a DAG - not supported + # @notes See http://en.wikipedia.org/wiki/Topological_sorting + def doToposort(self): + # We will manipulate the graph, so deepcopy it + graphCopy = copy.deepcopy(self._nodeDict) + # Find all valid start nodes in this graph - ones with no data dependencies themselves + startNodeNames = [] + for nodeName, node in graphCopy.iteritems(): + if len(node.connections['in']) == 0: + startNodeNames.append(nodeName) + + if len(startNodeNames) is 0: + raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), + 'There are no starting nodes in this graph - non-DAG graphs are not supported') + + msg.debug('Found this list of start nodes for toposort: {0}'.format(startNodeNames)) + + # The startNodeNames holds the list of nodes with their dependencies now satisfied (no input edges anymore) + while len(startNodeNames) > 0: + # Take the next startNodeName and zap it from the graph + theNodeName = startNodeNames.pop() + theNode = graphCopy[theNodeName] + self._toposort.append(theNodeName) + del graphCopy[theNodeName] + + # Now delete the edges this node was a source for + msg.debug('Considering connections from node {0}'.format(theNodeName)) + for connectedNodeName in theNode.connections['out']: + graphCopy[connectedNodeName].delConnection(toExe = theNodeName, type = 'in') + # Look for nodes which now have their dependencies satisfied + if len(graphCopy[connectedNodeName].connections['in']) == 0: + startNodeNames.append(connectedNodeName) + + # If there are nodes left then the graph has cycles, which means it's not a DAG + if len(graphCopy) > 0: + raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), + 'Graph topological sort had no more start nodes, but nodes were left {0} - non-DAG graphs are not supported'.format(graphCopy.keys())) + + msg.debug('Topologically sorted node order: {0}'.format(self._toposort)) + + # Now toposort the input data for nodes + self._toposortData = [] + for nodeName in self._toposort: + # First add input data, then output data + for dataType in self._nodeDict[nodeName].inputDataTypes: + if dataType not in self._toposortData: + self._toposortData.append(dataType) + for dataType in self._nodeDict[nodeName].outputDataTypes: + if dataType not in self._toposortData: + self._toposortData.append(dataType) + + msg.debug('Topologically sorted data order: {0}'.format(self._toposortData)) + + + ## @brief Find the graph's execution nodes, from input to output data types + # with each activated step and the inputs/outputs + # @param @c outputDataTypes Data to produce + # @param @c inputDataTypes Data available as inputs + def findExecutionPath(self): + # Switch off all nodes + self._execution = {} + for nodeName, node in self._nodeDict.iteritems(): + if node.inputDataTypes == set() and node.inputDataTypes == set(): + # Any nodes which have no data dependencies cannot be data driven, so we assume + # that they always execute + self._execution[nodeName] = {'enabled' : True, 'input' : set(), 'output' : set()} + else: + self._execution[nodeName] = {'enabled' : False, 'input' : set(), 'output' : set()} + + dataToProduce = copy.deepcopy(self._outputData) + dataAvailable = copy.deepcopy(self._inputData) + + # Consider the next data type in topo order + while len(dataToProduce) > 0: + nextDataType = None + for dataType in self._toposortData: + if dataType in dataToProduce: + nextDataType = dataType + dataToProduce.remove(nextDataType) + dataAvailable.update([nextDataType]) + break + + if not nextDataType: + msg.error('Still have to produce data type(s) {0}, but did not find anything in the toposorted data list ({1}).' + ' Transform parameters/graph are broken so aborting.'.format(dataToProduce, self._toposortData)) + raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), + 'Data type graph error') + + msg.debug('Next data type to try is {0}'.format(nextDataType)) + bestPath = self._bestPath(nextDataType, dataAvailable) + + msg.debug('Found best path for {0}: {1}'.format(nextDataType, bestPath)) + + ## @note Use @c modPath to construct an array which we iterate over in pairs of (currentNode, nextNode) + modPath = bestPath.path + [None] + for (nodeName, nextNodeName) in [ (n, modPath[modPath.index(n)+1]) for n in bestPath.path ]: + self._execution[nodeName]['enabled'] = True + # Add the necessary data types to the output of the first node and the input of the next + if nodeName in bestPath.newData: + self._execution[nodeName]['output'].update(bestPath.newData[nodeName]) + for newData in bestPath.newData[nodeName]: + if newData not in dataAvailable: + dataToProduce.update([newData]) + if nextNodeName: + self._execution[nextNodeName]['input'].update(bestPath.newData[nodeName]) + if nextNodeName in bestPath.extraData: + self._execution[nextNodeName]['input'].update(bestPath.extraData[nodeName]) + # Add any extra data we need (from multi-exit nodes) to the data to produce list + for extraNodeData in bestPath.extraData.itervalues(): + for extra in extraNodeData: + if extra not in dataAvailable: + dataToProduce.update([extra]) + + # Now remove the fake data objects from activated nodes + for node, props in self._execution.iteritems(): + msg.debug('Removing fake data from node {0}'.format(node)) + props['input'] -= set(['inNULL', 'outNULL']) + props['output'] -= set(['inNULL', 'outNULL']) + + msg.debug('Execution dictionary: {0}'.format(self._execution)) + + + ## @brief Find the best path from a end to a start node, producing a certain type of data + # given the set of currently available data and the current set of activated nodes + # @param data Data to produce + # @param dataAvailable Data types which can be used as sources + # @param startNodeName Find the path to this node (default '_start') + # @param endNodeName Find the path from this node (default '_end_DATATYPE') + # @details We can always ask the algorithm to trace the part from end to start + # for this data type (this data is in endnode by construction). If we have to go + # along an edge where the data is not yet available then we need to add this data to + # our list of data to produce. + def _bestPath(self, data, dataAvailable, startNodeName = '_start', endNodeName = None): + + if endNodeName is None: + endNodeName = '_end_{0}'.format(data) + + # Set of all considered paths + # Initialise this with our endNode name - algorithm works back to the start + pathSet = [graphPath(endNodeName, data),] + + msg.debug('Started path finding with seed path {0}'.format(pathSet[0])) + + # Halting condition - only one path and its first element is startNodeName + while len(pathSet) > 1 or pathSet[0].path[0] is not startNodeName: + msg.debug('Starting best path iteration with {0} paths in {1}'.format(len(pathSet), pathSet)) + # Copy the pathSet to do this, as we will update it + for path in pathSet[:]: + msg.debug('Continuing path finding with path {0}'.format(path)) + currentNodeName = path.path[0] + if currentNodeName is startNodeName: + msg.debug('Path {0} has reached the start node - finished'.format(path)) + continue + # If there are no paths out of this node then it's a dead end - kill it + if len(self._nodeDict[currentNodeName].connections['in']) is 0: + msg.debug('Path {0} is a dead end - removing'.format(path)) + pathSet.remove(path) + continue + # If there is only one path out of this node, we extend it + if len(self._nodeDict[currentNodeName].connections['in']) is 1: + msg.debug('Single exit from path {0} - adding connection to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0])) + self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0]) + continue + # Else we need to clone the path for each possible exit + msg.debug('Multiple exits from path {0} - will clone for each extra exit'.format([path])) + for nextNodeName in self._nodeDict[currentNodeName].connections['in'].keys()[1:]: + newPath = copy.deepcopy(path) + msg.debug('Cloned exit from path {0} to {1}'.format(newPath, nextNodeName)) + self._extendPath(newPath, currentNodeName, nextNodeName) + pathSet.append(newPath) + # Finally, use the original path to extend along the first node exit + msg.debug('Adding exit from original path {0} to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0])) + self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0]) + + # Now compare paths which made it to the end - only keep the shortest + lowestCostPath = None + for path in pathSet[:]: + currentNodeName = path.path[0] + if currentNodeName is startNodeName: + if lowestCostPath is None: + lowestCostPath = path + continue + if path.cost >= lowestCostPath.cost: + msg.debug('Path {0} is no cheaper than best path {1} - removing'.format(path, lowestCostPath)) + pathSet.remove(path) + else: + msg.debug('Path {0} is cheaper than previous best path {1} - removing previous'.format(path, lowestCostPath)) + pathSet.remove(lowestCostPath) + lowestCostPath = path + + # Emergency break + if len(pathSet) == 0: + raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), + 'No path found between {0} and {1} for {2}'.format(startNodeName, endNodeName, data)) + return pathSet[0] + + + ## @brief Connect a path to a particular node + # @param path graphPath instance + # @param nextNodeName Node to connect to + def _extendPath(self, path, currentNodeName, nextNodeName): + nextNode = self._nodeDict[nextNodeName] + edgeData = self._nodeDict[currentNodeName].connections['in'][nextNodeName] + msg.debug('Connecting {0} to {1} with data {2}'.format(currentNodeName, nextNodeName, edgeData)) + + extraData = set() + if self._execution[currentNodeName]['enabled'] is True: + extraCost = 0 + else: + for edgeDataElement in edgeData: + # Simple case - one data connection only + if edgeDataElement in self._nodeDict[currentNodeName].inData: + extraCost = self._nodeDict[currentNodeName].weights[edgeDataElement] + else: + # Complex case - the start requirement for this node must be multi-data + # Only the first match in the dataIn lists is considered + # This will break if there are multiple overlapping dataIn requirements + for nodeStartData in self._nodeDict[currentNodeName].inData: + if isinstance(nodeStartData, (list, tuple)) and edgeDataElement in nodeStartData: + extraCost = self._nodeDict[currentNodeName].weights[nodeStartData] + msg.debug('Found multi-data exit from {0} to {1} - adding {2} to data requirements'.format(currentNodeName, nextNodeName, nodeStartData)) + extraData.update(nodeStartData) + break + # Remove data which is on the edge itself + extraData.difference_update(edgeData) + + msg.debug('Updating path {0} with {1}, {2}, {3}, {4}'.format(path, nextNodeName, edgeData, extraData, extraCost)) + path.addToPath(nextNodeName, edgeData, extraData, extraCost) + + + ## @brief Nodes in topologically sorted order, if available, else sorted name order + def __str__(self): + nodeStrList = [] + if len(self._toposort) > 0: + nodeNames = self._toposort + else: + nodeNames = self._nodeDict.keys() + nodeNames.sort() + for nodeName in nodeNames: + if not nodeName.startswith('_'): + nodeStrList.append(str(self._nodeDict[nodeName])) + return os.linesep.join(nodeStrList) + + + ## @brief Nodes in topologically sorted order, if available, else sorted name order + def __repr__(self): + nodeStrList = [] + if len(self._toposort) > 0: + nodeNames = self._toposort + else: + nodeNames = self._nodeDict.keys() + nodeNames.sort() + for nodeName in nodeNames: + nodeStrList.append(repr(self._nodeDict[nodeName])) + return os.linesep.join(nodeStrList) + + +## @brief Vanilla graph node +class graphNode(object): + + ## @brief Graph node constructor + # @param name Name of this node + # @param indata Iterable containing input data connections for this node + # @param outdata Iterable containing output data connections for this node + # @param weight Weights (relative execution cost) for each input connection to this node + # @note For @c inData and @c outData a list, tuple or set is acceptable. Multiple input data types + # should be expressed as lists or tuples themselves, e.g., <tt>[('HIST_AOD', 'HIST_ESD')]</tt>. They + # cannot be sets themselves as python sets cannot contain other sets. + def __init__(self, name, inData, outData, weight = None): + self._name = name + self._inData = set(inData) + self._outData = set(outData) + + ## @note @c _inWeights takes the form of a dictionary, keyed by input data type + # and giving the relative cost of executing this node with those input data types. + self._inWeights = {} + if weight is None: + for data in self._inData: + self._inWeights[data] = 1 + elif isinstance(weight, int): + for data in self._inData: + self._inWeights[data] = weight + else: + # Must be a dictionary with its keys equal to the _inData elements + self._inWeights = weight + + self._inputDataTypes = self._flattenSet(self._inData) + self._outputDataTypes = self._flattenSet(self._outData) + + # Connections dictionary will hold incoming and outgoing edges - the incoming connections + # are very useful for topological ordering. Nested dictionary with 'in', 'out' keys, where + # the values are dictionaries with nodeName keys and set(dataTypes) as values. + # e.g., {'out': {'_end_HIST': set(['HIST'])}, 'in': {'ESDtoAOD': set(['HIST_AOD']), 'RAWtoESD': set(['HIST_ESD'])}} + self._connections = {'in': {}, 'out': {}} + + @property + def name(self): + return self._name + + @property + def inData(self): + return self._inData + + @property + def outData(self): + return self._outData + + @property + def inputDataTypes(self): + return self._flattenSet(self.inData) + + @property + def outputDataTypes(self): + return self._flattenSet(self._outData) + + @property + def connections(self): + return self._connections + + @property + def weights(self): + return self._inWeights + + ## @brief Add a new edge connection for this node + # @param @c toExe Other node for this edge + # @param @c data Data which connects these nodes (iterable), converted to set object + # @param @c type If this is an incoming or outgoing edge for this node + def addConnection(self, toExe, data, type = 'out'): + self._connections[type][toExe] = set(data) + + ## @brief Delete a connection from this node + # @param @c toExe Other node for this vertex + # @param @c type If this is an incoming or outgoing edge for this node + def delConnection(self, toExe, type = 'out'): + del self._connections[type][toExe] + + ## @brief Delete all connections + def resetConnections(self): + self._connections = {'in': {}, 'out': {}} + + ## @brief Take a list and return all simple members plus the members of any list/tuples in the set (i.e., + # flatten out multiple input tuples) + def _flattenSet(self, startSet): + flatData = set() + for data in startSet: + if isinstance(data, (list, tuple)): + flatData.update(data) + else: + flatData.update([data]) + return flatData + + def __str__(self): + return '{0} (dataIn {1} -> dataOut {2})'.format(self._name, self._inData, self._outData) + + def __repr__(self): + return '{0} (dataIn {1}, weights {2}; dataOut {3}; connect {4})'.format(self._name, self._inData, self._inWeights, self._outData, self._connections) + + +## @brief Initialise a graph node from an executor +class executorNode(graphNode): + ## @brief executorNode constructor + # @note This is just a wrapper to call the parent class's constructor with the extracted + # input and output data types + # @param executor Executor used to make this node + # @param weight Weight specifier + def __init__(self, executor = None, weight = None): + super(executorNode, self).__init__(executor.name, executor.inData, executor.outData, weight) + +## @brief Path object holding a list of nodes and data types which trace a single path through the graph +class graphPath(object): + + ## @brief graphPath constructor + # @param endNodeName The node name where this path will end + # @param data The data type which this path will make + # @param cost The starting cost of this path (default 0) + def __init__(self, endNodeName, data, cost = 0): + self._path = [endNodeName] + self._data = data + self._cost = cost + + ## @note These are dictionaries, keyed by substep, with the data which needs to be produced in that substep, i.e., + # it is the output data for that step and the input data for the downstream step. To avoid data duplication + # the values are sets. @c extraData gives data which is not made in this path itself, but is needed in order + # to satisfy multi-data entry criteria. @c newData is data which is produced along this path. + self._newData = dict() + self._extraData = dict() + + @property + def path(self): + return self._path + + @property + def cost(self): + return self._cost + + @property + def newData(self): + return self._newData + + @property + def extraData(self): + return self._extraData + + def addToPath(self, newNodeName, newData = set(), extraData = set(), extraCost = 0): + self._path.insert(0, newNodeName) + self._newData[newNodeName] = newData + self._cost += extraCost + self._extraData[newNodeName] = extraData + + def addCost(self, cost): + self._cost += cost + + def __str__(self): + return '{0}: path {1}; cost {2}, newData {3}, extraData {4}'.format(self._data, self._path, self._cost, self._newData, self._extraData) + diff --git a/Tools/PyJobTransforms/python/trfJobOptions.py b/Tools/PyJobTransforms/python/trfJobOptions.py new file mode 100644 index 0000000000000000000000000000000000000000..0359293ea5ff3195b34d9d625e60ed6781b31e66 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfJobOptions.py @@ -0,0 +1,214 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfJobOptions +# +# @brief Contains functions related Athena Job Options files +# @details Generates runArgs JobOptions and interfaces with skeleton +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfJobOptions.py 575444 2013-12-13 14:12:10Z graemes $ +# + +import os +import string +import time +import unittest + +import logging +msg = logging.getLogger(__name__) + +import PyJobTransforms.trfArgClasses as trfArgClasses +import PyJobTransforms.trfExceptions as trfExceptions +from PyJobTransforms.trfExitCodes import trfExit + +from PyJobTransforms.trfUtils import findFile + + +## @class RunArguments +## @brief Hold run arguments as name-value pairs +class RunArguments(object): + """Dynamic class that holds the run arguments as named members with values.""" + def __str__(self): + myself = 'RunArguments:' + for arg in dir(self): + if not arg.startswith('__'): + myself += '%s %s = %s' % (os.linesep, arg, repr(getattr(self, arg))) + return myself + + +## @class JobOptionsTemplate +# @brief Class that generates the job options (AKA runargs) python file for an athena executor +class JobOptionsTemplate(object): + """For generating runArgs JobOptions file """ + + ## @brief Initialise the job options template class + # @param exe Associated athena executor + # @param version Optional version string + # @param runArgsName Name of runtime argument class + # @note Almost all useful parameters for this class are part of the executor itself + def __init__(self, exe, version=None, runArgsName='runArgs'): + self._exe = exe + self._version = version + self._runArgsName = runArgsName + self._runArgsFile = 'runargs.' + self._exe.name + '.py' + + + ## @brief Write the runArgs Job Options file + def writeRunArgs(self, input = dict(), output = dict()): + msg.info('Writing runArgs to file \"%s\"' % self._runArgsFile) + + with open(self._runArgsFile, 'w') as runargsFile: + try: + # First write a little header + print >>runargsFile, os.linesep.join(("# Run arguments file auto-generated on {0} by:".format(time.asctime()), + "# JobTransform: {0}".format(self._exe.name), + "# Version: {0}".format(self._version) + )) + + # Now make sure we import the runArgs class for out job options + print >>runargsFile, os.linesep.join(("# Import runArgs class", + "from PyJobTransforms.trfJobOptions import RunArguments", + "{0} = RunArguments()".format(self._runArgsName) + )) + + # Handy to write the substep name here as it can be used as (part of) a random seed + # in some cases + print >>runargsFile, '{0}.trfSubstepName = {1!r}'.format(self._runArgsName, self._exe.name), os.linesep + + # Now loop over the core argdict and see what needs to be given as a runArg + declaredRunargs = [] + for k, v in self._exe.conf.argdict.iteritems(): + # Check if this arg is supposed to be in runArgs + if isinstance(v, trfArgClasses.argument) and v.isRunarg: + # Files handled later + if isinstance(v, trfArgClasses.argFile): + continue + + msg.debug('Argument {0} is a runarg, will be added to JO file (value {1})'.format(k, v.value)) + + ## @note Substep type arguments are rather special, they apply to only named + # executors or substeps. We use the returnMyValue() method to sort out what + # specific value applies to us + if isinstance(v, trfArgClasses.argSubstep): + myValue = v.returnMyValue(exe = self._exe) + if myValue is not None: + print >>runargsFile, "{0}.{1!s} = {2!r}".format(self._runArgsName, k, myValue) + msg.debug('Added substep type argument {0} as: {1}'.format(k, myValue)) + declaredRunargs.append(k) + else: + print >>runargsFile, "{0}.{1!s} = {2!r}".format(self._runArgsName, k, v.value) + declaredRunargs.append(k) + else: + msg.debug('Argument {0} is not a runarg - ignored'.format(k)) + + # Now make sure that if we did not add maxEvents then we set this to -1, which + # avoids some strange defaults that only allow 5 events to be processed + if 'maxEvents' not in declaredRunargs: + print >>runargsFile, os.linesep.join(("", "# Explicitly added to process all events in this step", + "{0}.maxEvents = -1".format(self._runArgsName), + )) + + # Now deal with our input and output files + print >>runargsFile, os.linesep, "# Input data" + for dataType, dataArg in input.iteritems(): + print >>runargsFile, '{0}.input{1}File = {2!r}'.format(self._runArgsName, dataType, dataArg.value) + print >>runargsFile, '{0}.input{1}FileType = {2!r}'.format(self._runArgsName, dataType, dataArg.type) + # Add the input event count, if we know it + if dataArg.isCached(metadataKeys = ['nentries']): + print >>runargsFile, '{0}.input{1}FileNentries = {2!r}'.format(self._runArgsName, dataType, dataArg.nentries) + + print >>runargsFile, os.linesep, "# Output data" + for dataType, dataArg in output.iteritems(): + # Need to be careful to convert _output_ filename as a strings, not a list + print >>runargsFile, '{0}.output{1}File = {2!r}'.format(self._runArgsName, dataType, dataArg.value[0]) + print >>runargsFile, '{0}.output{1}FileType = {2!r}'.format(self._runArgsName, dataType, dataArg.type) + + + # Process all of the tweaky special runtime arguments + print >>runargsFile, os.linesep, "# Extra runargs" + ## @note extraRunargs are passed using repr, i.e., they should be constants + for k, v in self._exe._extraRunargs.iteritems(): + ## @note: What to do if this is a CLI argument as well, in particular + # for arguments like preExec we want to add to the list, not replace it + if k in declaredRunargs: + if isinstance(self._exe.conf.argdict[k].value, list): + msg.debug('Extending runarg {0!s}={1!r}'.format(k, v)) + print >>runargsFile, '{0}.{1!s}.extend({2!r})'.format(self._runArgsName, k, v) + else: + msg.debug('Adding runarg {0!s}={1!r}'.format(k, v)) + print >>runargsFile, '{0}.{1!s} = {2!r}'.format(self._runArgsName, k, v) + + ## @note runtime runargs are passed as strings, i.e., they can be evaluated + print >>runargsFile, os.linesep, '# Extra runtime runargs' + for k, v in self._exe._runtimeRunargs.iteritems(): + # These options are string converted, not repred, so they can write an option + # which is evaluated at runtime + # Protect this with try: except: for the Embedding use case + msg.debug('Adding runarg {0!s}={1!r}'.format(k, v)) + print >>runargsFile, os.linesep.join(('try:', + ' {0}.{1!s} = {2!s}'.format(self._runArgsName, k, v), + 'except AttributeError:', + ' print "WARNING - AttributeError for {0}"'.format(k))) + + ## @note Now write the literals into the runargs file + if self._exe._literalRunargs is not None: + print >>runargsFile, os.linesep, '# Literal runargs snippets' + for line in self._exe._literalRunargs: + print >>runargsFile, line + + ## Another special option - dataArgs are always written to the runargs file + for dataType in self._exe._dataArgs: + print >>runargsFile, os.linesep, '# Forced data value arguments' + if dataType in self._exe.conf.dataDictionary: + print >>runargsFile, '{0}.data{1}arg = {2!r}'.format(self._runArgsName, dataType, + self._exe.conf.dataDictionary[dataType].value) + else: + print >>runargsFile, '# Warning: data type "{0}" is not part of this transform'.format(dataType) + + # This adds the correct JO fragment for AthenaMP job, where we need to ask + # the FileMgr to produce the requested log and report files + # Also, aggregating the workers' logfiles into the mother's makes life + # easier for debugging + if self._exe._athenaMP: + print >>runargsFile, os.linesep, '# AthenaMP Options' + # Proxy for both options + print >>runargsFile, os.linesep.join((os.linesep, + 'from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps', + 'AthenaMPJobProps.AthenaMPFlags.WorkerTopDir="{0}"'.format(self._exe._athenaMPWorkerTopDir), + 'AthenaMPJobProps.AthenaMPFlags.OutputReportFile="{0}"'.format(self._exe._athenaMPFileReport), + 'AthenaMPJobProps.AthenaMPFlags.CollectSubprocessLogs=True' + )) + msg.info('Successfully wrote runargs file {0}'.format(self._runArgsFile)) + + except (IOError, OSError) as e: + errMsg = 'Got an error when writing JO template {0}: {1}'.format(self._runArgsFile, e) + msg.error(errMsg) + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg) + + + + ## @brief Make sure skeleton file is available + def ensureJobOptions(self): + # Check the runArgs: + if self._runArgsFile is None: + msg.warning('No runArgs available') + + if not findFile(os.environ["JOBOPTSEARCHPATH"], self._runArgsFile): + msg.warning('Could not find runArgs file %s' % self._runArgsFile) + + # Check the skeleton(s): + for skeleton in self._exe._skeleton: + if not findFile(os.environ["JOBOPTSEARCHPATH"], skeleton): + msg.warning('Could not find job options skeleton file %s' % skeleton) + + + ## @brief Get the runArgs and skeleton joboptions, Master function + # @param input Input file list + # @param output Output file list + # @return List of runargs and skeletons to be processed by athena + def getTopOptions(self, input = dict(), output = dict()): + # First Make the runArgs file: + self.writeRunArgs(input = input, output = output) + # Make sure runArgs and skeleton are valid + self.ensureJobOptions() + return [ self._runArgsFile ] + self._exe._skeleton + diff --git a/Tools/PyJobTransforms/python/trfLogger.py b/Tools/PyJobTransforms/python/trfLogger.py new file mode 100644 index 0000000000000000000000000000000000000000..94ccf54bfd4a2376b5b3f25b2b33249fec0ad7ad --- /dev/null +++ b/Tools/PyJobTransforms/python/trfLogger.py @@ -0,0 +1,56 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfLogger +# +# @brief Logging configuration for ATLAS job transforms +# @details Use the plain python logging module for transform logging. +# There is no real benefit to AthenaCommon.Logging for transforms. +# Root logger is set as msg and should be imported to the top level +# transform. If a logging configuration file set in the ENVVAR \c TRF_LOGCONF +# exists this is used for configuring the transform logger. Otherwise a +# standard setup is used. +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfLogger.py 573315 2013-12-02 15:45:55Z graemes $ + +import logging +import logging.config +import os +import sys +import unittest + +## base logger +# Note we do not setup a root logger, as this has nasty interactions with the PyUtils +# root logger (double copies of messages). Instead we log in the transform module space. +msg = logging.getLogger('PyJobTransforms') + +## Map strings to standard logging levels +# FATAL is the same level as CRITICAL (used for parsing athena logfiles); ditto for VERBOSE==DEBUG +stdLogLevels = {'DEBUG' : logging.DEBUG, + 'VERBOSE' : logging.DEBUG, + 'INFO' : logging.INFO, + 'WARNING' : logging.WARNING, + 'ERROR' : logging.ERROR, + 'CRITICAL' : logging.CRITICAL, + 'FATAL' : logging.CRITICAL, + } + +## This is the correct order to put the most serious stuff first +stdLogLevelsByCritcality = ['FATAL', 'CRITICAL', 'ERROR', 'WARNING', 'INFO', 'VERBOSE', 'DEBUG'] + +# If TRF_LOGCONF is defined then try to use that file +# for logging setup +if 'TRF_LOGCONF' in os.environ and os.access(os.environ['TRF_LOGCONF'], os.R_OK): + logging.config.fileConfig(os.environ['TRF_LOGCONF']) +else: + # Otherwise use a standard logging configuration + hdlr = logging.StreamHandler(sys.stdout) + # asctime seems too verbose...? + frmt = logging.Formatter("%(name)s.%(funcName)s %(asctime)s %(levelname)s %(message)s") +# frmt = logging.Formatter("Trf:%(name)s.%(funcName)s %(levelname)s %(message)s") + hdlr.setFormatter(frmt) + msg.addHandler(hdlr) + msg.setLevel(logging.INFO) + +## Change the loggging level of the root logger +def setRootLoggerLevel(level): + msg.setLevel(level) diff --git a/Tools/PyJobTransforms/python/trfReports.py b/Tools/PyJobTransforms/python/trfReports.py new file mode 100644 index 0000000000000000000000000000000000000000..2466f97abde2a998856bf805cf5de39965dd19c7 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfReports.py @@ -0,0 +1,742 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfReports +# +# @brief Transform report classes and helper functions +# @details Classes whose instance encapsulates transform reports +# at different levels, such as file, executor, transform +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfReports.py 614626 2014-09-02 14:22:48Z volkmer $ +# + +__version__ = '$Revision: 614626 $' +__doc__ = 'Transform report module' + +import cPickle as pickle +import json +import os.path +import platform +import pprint +import sys + +from copy import deepcopy +from xml.etree import ElementTree + +import logging +msg = logging.getLogger(__name__) + +import PyJobTransforms.trfExceptions as trfExceptions +import PyJobTransforms.trfArgClasses as trfArgClasses + +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfUtils import shQuoteStrings, isodate, prettyXML + +## @brief Default values for file reporting +defaultFileReport = {'input': 'name', 'temporary': None, 'output': 'full'} + +## @brief Base (almost virtual) report from which all real transform reports derive +class trfReport(object): + def __init__(self): + pass + + ## @brief String representation of the job report + # @detail Uses pprint module to output the python object as text + # @note This is a 'property', so no @c fast option is available + def __str__(self): + return pprint.pformat(self.python()) + + ## @brief Method which returns a python representation of a report + # @param fast Boolean which forces the fastest possible report to be written + def python(self, fast = False, fileReport = defaultFileReport): + return {} + + ## @brief Method which returns a JSON representation of a report + # @param fast Boolean which forces the fastest possible report to be written + # @details Calls @c json.dumps on the python representation + def json(self, fast = False): + return json.dumps(self.python, type) + + ## @brief Method which returns an ElementTree.Element representation of the old POOLFILECATALOG report + # @param fast Boolean which forces the fastest possible report to be written + def classicEltree(self, fast = False): + return ElementTree.Element('POOLFILECATALOG') + + ## @brief Method which returns a python representation of a report in classic Tier 0 style + # @param fast Boolean which forces the fastest possible report to be written + def classicPython(self, fast = False): + return {} + + def writeJSONReport(self, filename, sort_keys = True, indent = 2, fast = False, + fileReport = defaultFileReport): + with open(filename, 'w') as report: + try: + json.dump(self.python(fast = fast, fileReport = fileReport), report, sort_keys = sort_keys, indent = indent) + except TypeError as e: + # TypeError means we had an unserialisable object - re-raise as a trf internal + message = 'TypeError raised during JSON report output: {0!s}'.format(e) + msg.error(message) + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), message) + + def writeTxtReport(self, filename, dumpEnv = True, fast = False, fileReport = defaultFileReport): + with open(filename, 'w') as report: + print >> report, '# {0} file generated on'.format(self.__class__.__name__), isodate() + print >> report, pprint.pformat(self.python(fast = fast, fileReport = fileReport)) + if dumpEnv: + print >> report, '# Environment dump' + eKeys = os.environ.keys() + eKeys.sort() + for k in eKeys: + print >> report, '%s=%s' % (k, os.environ[k]) + print >> report, '# Machine report' + print >> report, pprint.pformat(machineReport().python(fast = fast)) + + def writeGPickleReport(self, filename, fast = False): + with open(filename, 'w') as report: + pickle.dump(self.classicPython(fast = fast), report) + + def writeClassicXMLReport(self, filename, fast = False): + with open(filename, 'w') as report: + print >> report, prettyXML(self.classicEltree(fast = fast), poolFileCatalogFormat = True) + + def writePilotPickleReport(self, filename, fast = False, fileReport = defaultFileReport): + with open(filename, 'w') as report: + pickle.dump(self.python(fast = fast, fileReport = fileReport), report) + + +## @brief Class holding a transform job report +class trfJobReport(trfReport): + ## @brief This is the version counter for transform job reports + # any changes to the format @b must be reflected by incrementing this + _reportVersion = '0.9.6' + _metadataKeyMap = {'AMIConfig': 'AMI', } + + ## @brief Constructor + # @param parentTrf Mandatory link to the transform this job report represents + def __init__(self, parentTrf): + self._trf = parentTrf + + + ## @brief compress the given filereport dict + # @param fileDict the dict to compress + def _compressFileDict(self, fileDict): + resultDict = {} + + for fileType, fileList in fileDict.iteritems(): + # check if compression makes sense + if len(fileList) > 1: + # which keys need to be ignored definitely?? + keyIgnoreList = ['nentries', 'size'] + compressionHappend = False + compressedDict = {'common' : {}, + 'files' : fileList} + + # iterate over all keys in first file, if they are not in here, why look int he others?? + file0 = fileList[0] + for k, v in file0.iteritems(): + if k in keyIgnoreList: + continue + + # there are already compressions for this subfile + if k == 'subFiles': + #special handling, now search in all subfile lists + keyCompressible = True + for checkFile in fileList[1:]: + if 'subFiles' not in checkFile.keys(): + keyCompressible = False + break + if not keyCompressible: + # not al have subFiles, leave all subFiles alone + continue + + subFilesDict0 = file0['subFiles'] + for entry in subFilesDict0: + for k, v in entry.iteritems(): + subFileKeyCompressible = True + localCommonKeyToDelete = [] + for checkFile in fileList[1:]: + if 'common' in checkFile.keys(): + # we do not need to look up in subFiles + if k not in checkFile['common'].keys(): + #it is not in common, so it CAN NOT be in all the subfiles, we can break + subFileKeyCompressible = False + break + else: + for subFileEntry in checkFile['subFiles']: + if k not in subFileEntry.keys(): + subFileKeyCompressible = False + break + if str(v) != str(subFileEntry[k]): + subFileKeyCompressible = False + break + + if subFileKeyCompressible: + compressionHappend = True + compressedDict['common'][k] = subFilesDict0[0][k] + # end of subfiles + continue + if k == 'common': + #special handling, now search in all subfile lists + commonKeyCompressible = True + localCommonKeyToDelete = [] + + commonDict0 = file0['common'] + for k, v in commonDict0.iteritems(): + for checkFile in fileList[1:]: + if 'common' in checkFile.keys(): + # we do not need to look up in subFiles + if k not in checkFile['common'].keys(): + #it is not in common, so it CAN NOT be in all the subfiles, we can break + commonKeyCompressible = False + break + else: + localCommonKeyToDelete.append(checkFile) + else: + for subFileEntry in checkFile['subFiles']: + if k not in subFileEntry.keys(): + commonKeyCompressible = False + break + if str(v) != str(subFileEntry[k]): + commonKeyCompressible = False + break + + if k in checkFile.keys(): + if str(v) != str(subFileEntry[k]): + commonKeyCompressible = False + break + + if commonKeyCompressible: + compressionHappend = True + compressedDict['common'][k] = commonDict0[k] + # end of common + continue + + # this is the normal case for all keys not in common or subfiles + keyCompressible = True + inCommon = True + inSubFiles = True + for checkFile in fileList[1:]: + if 'common' in checkFile.keys(): + if k in checkFile['common'].keys(): + + if str(file0[k]) != str(checkFile['common'][k]): + inCommon = False + else: + inCommon = False + else: + inCommon = False + + if 'subFiles' in checkFile.keys(): + # has to be in all subfiles + for subFile in checkFile['subFiles']: + if k not in subFile.keys(): + inSubFiles = False + break + else: + if str(file0[k]) != str(subFile[k]): + inSubFiles = False + break + + if k in checkFile.keys(): + if file0[k] != checkFile[k]: + keyCompressible = False + else: + keyCompressible = False + + if keyCompressible or inCommon or inSubFiles: + compressionHappend = True + compressedDict['common'][k] = v + + # restore old layout + if not compressionHappend: + resultDict[fileType] = fileList + else: + # clean all keys that are compressible + resultDict[fileType] = compressedDict + for key in compressedDict['common'].keys(): + for fileEntry in fileList: + if key in fileEntry.keys(): + del fileEntry[key] + continue + if 'common' in fileEntry.keys(): + if key in fileEntry['common'].keys(): + del fileEntry['common'][key] + continue + for subFileEntry in fileEntry['subFiles']: + if key in subFileEntry.keys(): + del subFileEntry[key] + else: + resultDict[fileType] = fileList + + return resultDict + + ## @brief generate the python transform job report + # @param type Then general type of this report (e.g. fast) + # @param fileReport Dictionary giving the type of report to make for each type of file. + # This dictionary has to have all io types as keys and valid values are: + # @c None - skip this io type; @c 'full' - Provide all details; @c 'name' - only dataset and + # filename will be reported on. + # @param machineReport Boolean as to whether to include machine information + def python(self, fast = False, fileReport = defaultFileReport, machineReport = False): + myDict = {'name': self._trf.name, + 'reportVersion': self._reportVersion, + 'cmdLine': ' '.join(shQuoteStrings(sys.argv)), + 'exitAcronym': trfExit.codeToName(self._trf.exitCode), + 'exitCode': self._trf.exitCode, + 'exitMsg': self._trf.exitMsg, + 'created': isodate(), + } + + # Iterate over argValues... + myDict['argValues'] = {} + for k, v in self._trf.argdict.iteritems(): + if isinstance(v, trfArgClasses.argument): + myDict['argValues'][k] = v.value + else: + myDict['argValues'][k] = v + + # Iterate over files + myDict['files'] = {} + for fileType in ('input', 'output', 'temporary'): + if fileReport[fileType]: + myDict['files'][fileType] = [] + # Should have a dataDictionary, unless something went wrong very early... + for dataType, dataArg in self._trf._dataDictionary.iteritems(): + if fileReport[dataArg.io]: + entry = {"type": dataType} + entry.update(trfFileReport(dataArg).python(fast = fast, type = fileReport[dataArg.io])) + myDict['files'][dataArg.io].append(entry) + + try: + saveFilesDict = deepcopy(myDict['files']) + myDict['files'] = self._compressFileDict(myDict['files']) + except Exception, e: + msg.warning('Exception raised during file dictionary compression: {0}'.format(e)) + myDict['files'] = saveFilesDict + + # We report on all executors, in execution order + myDict['executor'] = [] + if hasattr(self._trf, '_executorPath'): + for executionStep in self._trf._executorPath: + myDict['executor'].append(trfExecutorReport(self._trf._executorDictionary[executionStep['name']]).python(fast = fast)) + + # By default we do not include the machine report - delegated to pilot/Tier 0 + if machineReport: + myDict['machine'] = machineReport().python(fast = fast) + + # Resource consumption + reportTime = os.times() + + # Calculate total cpu time we used - + myCpuTime = reportTime[0] + reportTime[1] + childCpuTime = reportTime[2] + reportTime[3] + wallTime = reportTime[4] - self._trf.transformStart[4] + msg.debug('Raw cpu resource consumption: transform {0}, children {1}'.format(myCpuTime, childCpuTime)) + # Reduce childCpuTime by times reported in the executors + for exeReport in myDict['executor']: + if isinstance(exeReport['resource']['cpuTime'], (float, int, long)): + msg.debug('Subtracting {0}s time for executor {1}'.format(exeReport['resource']['cpuTime'], exeReport['name'])) + childCpuTime -= exeReport['resource']['cpuTime'] + + myDict['resource'] = {'transformCpuTime': int(myCpuTime + 0.5), + 'externalsCpuTime': int(childCpuTime + 0.5), + 'wallTime': int(wallTime + 0.5), + 'cpuUnit': 'seconds', + 'memUnit': 'kB'} # Not clear if this is 10^3 or 2^10 + + return myDict + + ## @brief Classic metadata.xml report + def classicEltree(self, fast = False): + trfTree = ElementTree.Element('POOLFILECATALOG') + # Extract some executor parameters here + for exeKey in ('preExec', 'postExec', 'preInclude', 'postInclude'): + if exeKey in self._trf.argdict: + for substep, pyfrag in self._trf.argdict[exeKey].value.iteritems(): + if substep is 'all': + ElementTree.SubElement(trfTree, 'META', type = 'string', name = exeKey, value = str(pyfrag)) + else: + ElementTree.SubElement(trfTree, 'META', type = 'string', name = exeKey + '_' + substep, value = str(pyfrag)) + for exeKey in ('autoConfiguration', 'AMIConfig', 'AMITag'): + if exeKey in self._trf.argdict: + if exeKey in self._metadataKeyMap: + classicName = self._metadataKeyMap[exeKey] + else: + classicName = exeKey + ElementTree.SubElement(trfTree, 'META', type = 'string', name = classicName, + value = str(self._trf.argdict[exeKey].value)) + + # Now add information about output files + for dataType, dataArg in self._trf._dataDictionary.iteritems(): + if dataArg.io == 'output': + for fileEltree in trfFileReport(dataArg).classicEltreeList(fast = fast): + trfTree.append(fileEltree) + + return trfTree + + ## @brief Classic Tier 0 metadata python object + # @details Metadata in python nested dictionary form, which will produce a Tier 0 + # .gpickle when pickled + def classicPython(self, fast = False): + # Things we can get directly from the transform + trfDict = {'jobInputs' : [], # Always empty? + 'jobOutputs' : [], # Filled in below... + 'more' : {'Machine' : 'unknown'}, + 'trfAcronym' : trfExit.codeToName(self._trf.exitCode), + 'trfCode' : self._trf.exitCode, + 'trfExitCode' : self._trf.exitCode, + } + + if self._trf.lastExecuted is not None: + trfDict.update({'athAcronym' : self._trf.lastExecuted.errMsg, + 'athCode' : self._trf.lastExecuted.rc}) + + + # Emulate the NEEDCHECK behaviour + if hasattr(self._trf, '_executorPath'): + for executor in self._trf._executorPath: + if hasattr(executor, '_logScan') and self._trf.exitCode is 0: + if executor._logScan._levelCounter['FATAL'] > 0 or executor._logScan._levelCounter['CRITICAL'] > 0: + # This should not happen! + msg.warning('Found FATAL/CRITICAL errors and exit code 0 - reseting to TRF_LOGFILE_FAIL') + self._trf.exitCode = trfExit.nameToCode('TRF_LOGFILE_FAIL') + trfDict['trfAcronym'] = 'TRF_LOGFILE_FAIL' + elif executor._logScan._levelCounter['ERROR'] > 0: + msg.warning('Found errors in logfile scan - changing exit acronymn to NEEDCHECK.') + trfDict['trfAcronym'] = 'NEEDCHECK' + + # Now add files + fileArgs = self._trf.getFiles(io = 'output') + for fileArg in fileArgs: + # N.B. In the original Tier 0 gpickles there was executor + # information added for each file (such as autoConfiguration, preExec). + # However, Luc tells me it is ignored, so let's not bother. + trfDict['jobOutputs'].extend(trfFileReport(fileArg).classicPython(fast = fast)) + # AMITag and friends is added per-file, but it's known only to the transform, so set it here: + for argdictKey in ('AMITag', 'autoConfiguration',): + if argdictKey in self._trf.argdict: + trfDict['jobOutputs'][-1]['more']['metadata'][argdictKey] = self._trf.argdict[argdictKey].value + # Mangle substep argumemts back to the old format + for substepKey in ('preExec', 'postExec', 'preInclude', 'postInclude'): + if substepKey in self._trf.argdict: + for substep, values in self._trf.argdict[substepKey].value.iteritems(): + if substep is 'all': + trfDict['jobOutputs'][-1]['more']['metadata'][substepKey] = values + else: + trfDict['jobOutputs'][-1]['more']['metadata'][substepKey + '_' + substep] = values + + # Now retrieve the input event count + nentries = 'UNKNOWN' + for fileArg in self._trf.getFiles(io = 'input'): + thisArgNentries = fileArg.nentries + if isinstance(thisArgNentries, (int, long)): + if nentries is 'UNKNOWN': + nentries = thisArgNentries + elif thisArgNentries != nentries: + msg.warning('Found a file with different event count than others: {0} != {1} for {2}'.format(thisArgNentries, nentries, fileArg)) + # Take highest number? + if thisArgNentries > nentries: + nentries = thisArgNentries + trfDict['nevents'] = nentries + + # Tier 0 expects the report to be in a top level dictionary under the prodsys key + return {'prodsys' : trfDict} + + +## @brief Class to contain metadata for an executor +class trfExecutorReport(object): + def __init__(self, executor): + self._exe = executor + + ## @brief Get a python representation of executor report + # @detail Returns the python representation of this executor + # @return Python dictionary + def python(self, fast = False): + reportDict = {'name': self._exe.name, + 'rc' : self._exe.rc, + 'validation' : self._exe.isValidated, + 'statusOK' : self._exe.hasExecuted and self._exe.isValidated and (not bool(self._exe.rc)), + 'errMsg': self._exe.errMsg, + 'exeConfig' : {} + } + # Add executor config information + for k, v in self._exe.extraMetadata.iteritems(): + reportDict['exeConfig'][k] = v + + # Do we have a logscan to add? + if hasattr(self._exe, '_logScan'): + reportDict['logfileReport'] = self._exe._logScan.python + reportDict['metaData'] = self._exe._logScan._metaData + + # Asetup information + if hasattr(self._exe, '_asetup'): + reportDict['asetup'] = self._exe._asetup + + # Resource consumption + reportDict['resource'] = {'cpuTime': self._exe.cpuTime, 'wallTime': self._exe.wallTime, } + if self._exe.memStats: + reportDict['resource'].update(self._exe.memStats) + + return reportDict + + +## @brief Class to contain metadata for file types +class trfFileReport(object): + # Class variable with the mapping from internal (usually = AthFile key) metadata keys to classic keys + # In the case of 'size' it's the same, but it's convenient to list it here to be able to just loop over dictionary + _internalToClassicMap = {'conditions_tag' : 'conditionsTag', + 'beam_type' : 'beamType', + 'geometry' : 'geometryVersion', + 'nentries' : 'events', + 'file_size': 'size', + } + # Similar for Tier 0 file metadata keys + _internalToGpickleMap = {'file_guid' : 'GUID', + 'checkSum' : 'checkSum', # We don't have this, but Tier 0 want it in the dictionary + 'nentries' : 'events', + 'file_size' : 'size', + } + # Keep a separate map for the data which Tier 0 gets in the 'more : {}' dictionary + _internalToGpickleMoreMap = {'beam_type' : 'beamType', + 'conditions_tag' : 'conditionsTag', + 'geometry' : 'geometryVersion', + } + + ## @brief Constructor + # @param @c fileArg argFile object which this report represents + def __init__(self, fileArg): + self._fileArg = fileArg + + ## @brief Get a python representation of file report + # @detail Returns the python representation of this file report + # which is a list of dictionaries (a la Tier 0) + # @param type Report type (e.g., None = normal or fast) + # @param base How extensive to make the report: name or full + def python(self, fast = False, type = 'full'): + # First entity contains shared properties - same for all files in this argFile + if type is 'name': + fileArgProps = {'dataset': self._fileArg.dataset, + 'nentries': self._fileArg.getnentries(fast), + 'subFiles' : []} + elif type is 'full': + fileArgProps = {'dataset' : self._fileArg.dataset, + 'type' : self._fileArg.type, + 'subFiles' : [], + 'argName' : self._fileArg.name, + } + else: + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), + 'Unknown file report type ({0}) in the file report for {1}'.format(type, self._fileArg)) + + ## @note We try to strip off the path when there are multiple files to be reported on, + # however we should not do this if any of the files share a basename or anything is + # in adifferent directory + uniqueBasenames = set([ os.path.basename(file) for file in self._fileArg.value ]) + uniqueDirectories = set([ os.path.dirname(os.path.relpath(os.path.normpath(file))) for file in self._fileArg.value ]) + if len(uniqueBasenames) != len(self._fileArg.value): + msg.info('Detected two files with the same basename in a file argument - report for file {0} will be produced with the path as a key'.format(file)) + basenameReport = False + elif len(uniqueDirectories) > 1: + msg.warning('Detected output files in different directories - report for file {0} will be produced with the path as a key'.format(file)) + basenameReport = False + else: + basenameReport = True + for file in self._fileArg.value: + if basenameReport: + fileArgProps['subFiles'].append(self.singleFilePython(file, fast = fast, type = type)) + else: + fileArgProps['subFiles'].append(self.singleFilePython(file, fast = fast, type = type, basename = False)) + + if type == 'full': + # move metadata to subFile dict, before it can be compressed + metaData = self._fileArg._fileMetadata + for fileName in metaData.keys(): + if basenameReport == False: + searchFileName = fileName + else: + searchFileName = os.path.basename(fileName) + + thisFile = None + for subFile in fileArgProps['subFiles']: + if subFile['name'] == searchFileName: + thisFile = subFile + break + + if thisFile is None: + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), + 'file metadata mismatch in subFiles dict') + + # append metadata keys, except all existing, to subfile dict and ignore _exists + for k, v in metaData[fileName].iteritems(): + if k not in thisFile.keys() and k != '_exists': + thisFile[k] = v + + # Try to compress subFile metadata + fileArgProps['common'] = {} + if len(self._fileArg.value) > 1: + fileList = fileArgProps['subFiles'] + mdKeys = fileList[0].keys() + for key in mdKeys: + # Certain keys we never 'compress' + if key in ('nentries', 'size', '_exists'): + continue + compressible = True + + file0 = fileList[0] + for file in fileList[1:]: + if key in file.keys(): + if file0[key] != file[key]: + compressible = False + break + else: + compressible = False + break + + msg.debug('key: >>%s<< -->> compressible: %r' % (key, compressible)) + if compressible: + fileArgProps['common'][key] = file0[key] + for file in fileList: + del file[key] + + # remove common if its empty + if len(fileArgProps['common'].keys()) == 0: + del fileArgProps['common'] + + return fileArgProps + + ## @brief Return unique metadata for a single file in an argFile class + # @param filename File for which report should be generated + # @param type Type of report to produce + # @param fast Generate a fast report, i.e., no external matadata calls + # @return Python report dictionary + def singleFilePython(self, filename, fast = False, type = 'full', basename = True): + if filename not in self._fileArg.value: + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), + 'Unknown file ({0}) in the file report for {1}'.format(filename, self._fileArg)) + if basename: + entry = {'name': os.path.basename(filename)} + else: + entry = {'name': os.path.relpath(os.path.normpath(filename))} + if type is 'name': + # For 'name' we return only the GUID + entry.update(self._fileArg.getMetadata(files = filename, populate = not fast, metadataKeys = ['file_guid'])[filename]) + elif type is 'full': + # Suppress io because it's the key at a higher level and _exists because it's internal + entry.update(self._fileArg.getMetadata(files = filename, populate = not fast, maskMetadataKeys = ['io', '_exists'])[filename]) + else: + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), + 'Unknown file report type ({0}) in the file report for {1}'.format(type, self._fileArg)) + + return entry + + + ## @brief Return an element tree representation of the classic file report + # @detail Returns a list of eltree entities giving the classic metadata of a files + # for this argument. This is a list so that it can be attached as a + # series of nodes into the final PFC description of the job + # @return List of ElementTree.Element objects + def classicEltreeList(self, fast = False): + treeList = [] + for file in self._fileArg.value: + treeList.append(self.classicSingleEltree(file, fast = fast)) + + return treeList + + + ## @brief Return file metadata in 'classic' POOLFILECATALOG format + # @param filename Filename to generate Element object for + # @param fast Prevent external metadata calls if true + def classicSingleEltree(self, filename, fast = False): + if filename not in self._fileArg.value: + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), + 'Unknown file ({0}) in the file report for {1}'.format(filename, self._fileArg)) + tree = ElementTree.Element('File', ID = str(self._fileArg.getSingleMetadata(file = filename, metadataKey = 'file_guid', populate = not fast))) + logical = ElementTree.SubElement(tree, 'logical') + lfn = ElementTree.SubElement(logical, 'lfn', name = filename) + for myKey, classicKey in self._internalToClassicMap.iteritems(): + # beam_type is tricky - we return only the first list value, + # (but remember, protect against funny stuff!) + if myKey is 'beam_type': + beamType = self._fileArg.getSingleMetadata(file = filename, metadataKey = myKey, populate = not fast) + if isinstance(beamType, list): + if len(beamType) is 0: + ElementTree.SubElement(tree, 'metadata', att_name = classicKey, att_value = '') + else: + ElementTree.SubElement(tree, 'metadata', att_name = classicKey, att_value = str(beamType[0])) + else: + # This is really not normal, but best we can do is str conversion + ElementTree.SubElement(tree, 'metadata', att_name = classicKey, att_value = str(beamType)) + else: + ElementTree.SubElement(tree, 'metadata', att_name = classicKey, + att_value = str(self._fileArg.getSingleMetadata(file = filename, metadataKey = myKey, populate = not fast))) + # Now add the metadata which is stored at the whole argument level + ElementTree.SubElement(tree, 'metadata', att_name = 'fileType', att_value = str(self._fileArg.type)) + if self._fileArg.dataset is not None: + ElementTree.SubElement(tree, 'metadata', att_name = 'dataset', att_value = self._fileArg.dataset) + + return tree + + ## @brief Return file metadata in classic Tier 0 python style + # @param fast Prevent external metadata calls if true + # @return List of file metadata dictionaries + def classicPython(self, fast = False): + fileList = [] + for file in self._fileArg.value: + fileList.append(self.classicSinglePython(file, fast = fast)) + return fileList + + ## @brief Return single file metadata in classic Tier 0 python style + # @param fast Prevent external metadata calls if true + # @param @c filename Filename to generate Element object for + def classicSinglePython(self, filename, fast = False): + if filename not in self._fileArg.value: + raise trfExceptions.TransformReportException(trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), + 'Unknown file ({0}) in the file report for {1}'.format(filename, self._fileArg)) + # Direct population of some keys + fileDict = {'lfn' : filename, + 'dataset' : self._fileArg.dataset, + } + # Fill in the mapped 'primary' keys + for myKey, classicKey in self._internalToGpickleMap.iteritems(): + fileDict[classicKey] = self._fileArg.getSingleMetadata(file = filename, metadataKey = myKey, populate = not fast) + if classicKey is 'checkSum' and fileDict[classicKey] is 'UNDEFINED': + # Old style is that we give back None when we don't know + fileDict[classicKey] = None + elif fileDict[classicKey] is 'UNDEFINED': + # Suppress things we don't generally expect to know + del fileDict[classicKey] + # Base 'more' stuff which is known by the argFile itself + fileDict['more'] = {'metadata' : {'fileType' : self._fileArg.type}} + for myKey, classicKey in self._internalToGpickleMoreMap.iteritems(): + value = self._fileArg.getSingleMetadata(file = filename, metadataKey = myKey, populate = not fast) + if value is not 'UNDEFINED': + fileDict['more']['metadata'][classicKey] = value + + return fileDict + + +## @brief Report on the machine where we ran +class machineReport(object): + + def python(self, fast = False): + machine = {} + # Get the following from the platform module + attrs = ['architecture', 'machine', 'node', 'platform', 'processor', 'python_version', 'uname', 'linux_distribution'] + for attr in attrs: + try: + machine[attr] = getattr(platform, attr).__call__() + except AttributeError, e: + msg.error('Failed to get "{0}" attribute from platform module: {1}'.format(attr, e)) + return machine + + +## @brief Small helper function to extract a per-datatype dictionary from a job report of lists +def pyJobReportToFileDict(jobReport, io = 'all'): + dataDict = {} + if 'files' not in jobReport: + msg.warning('Job report has no "files" section') + return dataDict + for iotype in jobReport['files']: + if io is 'all' or io == iotype: + for filedata in jobReport['files'][iotype]: + dataDict[filedata['type']] = filedata + return dataDict diff --git a/Tools/PyJobTransforms/python/trfSignal.py b/Tools/PyJobTransforms/python/trfSignal.py new file mode 100755 index 0000000000000000000000000000000000000000..cc65600fbca3c559c3f114328ae9e2571a28030e --- /dev/null +++ b/Tools/PyJobTransforms/python/trfSignal.py @@ -0,0 +1,54 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfSignal +# +# @brief Signal handling utilities for ATLAS job transforms +# @details Signal handling for ATLAS job transforms +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfSignal.py 609252 2014-07-29 16:20:33Z wbreaden $ +# + +import signal +import unittest + +import logging +msg = logging.getLogger(__name__) + +_defaultSignalHandlerDict = {} + +## @brief Install common handler for various signals. +# @details All existing signal handlers are saved in the @em _trfSignalHandlerDict dictionary to allow for them to be restored. +# @param handler Common signal handler for all signals concerned. +# @return None +def setTrfSignalHandlers(handler): + xDict = {} + if not _defaultSignalHandlerDict: # default handlers have not been saved + xDict = _defaultSignalHandlerDict + # N.B. Change from old transforms - do not catch SIGINT (^C). This is delt with more cleanly via + # the stdTrfExceptionHandler wrapper which handles the KeyboardInterrupt exception. + for s in ['SIGABRT', 'SIGFPE', 'SIGBUS', 'SIGHUP', 'SIGILL', 'SIGIO', 'SIGPIPE', 'SIGQUIT', 'SIGSEGV', 'SIGSYS', 'SIGXCPU', 'SIGXFSZ']: + try: + msg.debug("Setting signalhandler for %s to %s" % (s, handler)) + xDict[s] = signal.signal(getattr(signal, s), handler) + except Exception, e: + msg.error("Unable to attach custom signal handler to %s: %s" % (s, e)) + continue + return xDict + +## @brief Restore signal handlers to the default ones +# @details Handlers are restored from @em _defaultSignalHandlerDict dictionary. +# @warning Signal handling in Python is not 100% implemented. Certain signal (even those listed below) cannot be caught. +# @return None +def resetTrfSignalHandlers(): + if _defaultSignalHandlerDict: + currentTRFSignalHandler = signal.getsignal(signal.SIGTERM) + else: + currentTRFSignalHandler = signal.SIG_DFL + currentTRFSignalHandler = signal.getsignal(signal.SIGTERM) + for s in ['SIGABRT', 'SIGFPE', 'SIGBUS', 'SIGHUP', 'SIGILL', 'SIGIO', 'SIGPIPE', 'SIGQUIT', 'SIGSEGV', 'SIGSYS', 'SIGTERM', 'SIGXCPU', 'SIGXFSZ']: + try: + signal.signal(getattr(signal, s), _defaultSignalHandlerDict.get(s, signal.SIG_DFL)) + except Exception, e: + msg.error("Unable to attach custom signal handler to %s: %s" % (s, e)) + continue + return currentTRFSignalHandler diff --git a/Tools/PyJobTransforms/python/trfUtils.py b/Tools/PyJobTransforms/python/trfUtils.py new file mode 100644 index 0000000000000000000000000000000000000000..93760337b14d321b34d4a02119912c127235769f --- /dev/null +++ b/Tools/PyJobTransforms/python/trfUtils.py @@ -0,0 +1,1161 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfUtils +# @brief Transform utility functions +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfUtils.py 609252 2014-07-29 16:20:33Z wbreaden $ + +import os +import os.path +import re +import signal +import sys +import tarfile +import time +import unittest +import uuid + +import multiprocessing +import inspect +import base64 + +from datetime import datetime +from subprocess import Popen, STDOUT, PIPE, CalledProcessError +from xml.dom import minidom +from xml.parsers.expat import ExpatError +from xml.etree import ElementTree + +from PyJobTransforms.trfExitCodes import trfExit +import PyJobTransforms.trfExceptions as trfExceptions + +import logging +msg = logging.getLogger(__name__) + + +## @brief Find a named file along a colon separated PATH +# @details Note will also work for finding directories +# @return Full path to file or @c None is file is not found +def findFile(path, file): + # First see if the file already includes a path + msg.debug('Finding full path for %s\nSearching along %s' % (file, path)) + if file.startswith('/'): + return file + + # Split path + pathElements = path.split(':') + for pathElement in pathElements: + if os.path.exists(os.path.join(pathElement, file)): + return os.path.join(pathElement, file) + + return None + + +## @brief List all processes and parents and form a dictionary where the +# parent key lists all child PIDs +# @parameter listMyOrphans If this is @c True, then processes which share the same +# @c pgid as this process and have parent PID=1 (i.e., init) get added to this process's children, +# which allows these orphans to be added to the kill list. N.B. this means +# that orphans have two entries - as child of init and a child of this +# process +def getAncestry(listMyOrphans = False): + psCmd = ['ps', 'ax', '-o', 'pid,ppid,pgid,args', '-m'] + + try: + msg.debug('Executing %s' % psCmd) + p = Popen(psCmd, stdout=PIPE, stderr=PIPE) + stdout = p.communicate()[0] + psPID = p.pid + except OSError, e: + msg.error('Failed to execute "ps" to get process ancestry: %s' % repr(e)) + raise + + childDict = {} + myPgid = os.getpgrp() + myPid = os.getpid() + for line in stdout.split('\n'): + try: + (pid, ppid, pgid, cmd) = line.split(None, 3) + pid = int(pid) + ppid = int(ppid) + pgid = int(pgid) + # Ignore the ps process + if pid == psPID: + continue + if ppid in childDict: + childDict[ppid].append(pid) + else: + childDict[ppid] = [pid] + if listMyOrphans and ppid == 1 and pgid == myPgid: + msg.info("Adding PID {0} to list of my children as it seems to be orphaned: {1}".format(pid, cmd)) + if myPid in childDict: + childDict[myPid].append(pid) + else: + childDict[myPid] = [pid] + + except ValueError: + # Not a nice line + pass + return childDict + +## @brief Find all the children of a particular PID (calls itself recursively to descend into each leaf) +# @note The list of child PIDs is reversed, so the grandchildren are listed before the children, etc. +# so signaling left to right is correct +# @param psTree The process tree returned by @c trfUtils.listChildren(); if None then @c trfUtils.listChildren() is called internally. +# @param parent The parent process for which to return all the child PIDs +# @param listOrphans Parameter value to pass to getAncestry() if necessary +# @return @c children List of child PIDs +def listChildren(psTree = None, parent = os.getpid(), listOrphans = False): + '''Take a psTree dictionary and list all children''' + if psTree == None: + psTree = getAncestry(listMyOrphans = listOrphans) + + msg.debug("List children of %d (%s)" % (parent, psTree.get(parent, []))) + children = [] + if parent in psTree: + children.extend(psTree[parent]) + for child in psTree[parent]: + children.extend(listChildren(psTree, child)) + children.reverse() + return children + + +## @brief Kill all PIDs +# @note Even if this function is used, subprocess objects need to join() with the +# child to prevent it becoming a zombie +# @param childPIDs Explicit list of PIDs to kill; if absent then listChildren() is called +# @param sleepTime Time between SIGTERM and SIGKILL +# @param message Boolean if messages should be printed +# @param listOrphans Parameter value to pass to getAncestry(), if necessary (beware, killing +# orphans is dangerous, you may kill "upstream" processes; Caveat Emptor) +def infanticide(childPIDs = None, sleepTime = 3, message = True, listOrphans = False): + if childPIDs is None: + childPIDs = listChildren(listOrphans = listOrphans) + + if len(childPIDs) > 0 and message: + msg.info('Killing these child processes: {0}...'.format(childPIDs)) + + for pid in childPIDs: + try: + os.kill(pid, signal.SIGTERM) + except OSError: + pass + + time.sleep(sleepTime) + + for pid in childPIDs: + try: + os.kill(pid, signal.SIGKILL) + except OSError: + # OSError happens when the process no longer exists - harmless + pass + + +def call(args, bufsize=0, executable=None, stdin=None, preexec_fn=None, close_fds=False, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0, message="", logger=msg, loglevel=None, timeout=None, retry=2, timefactor=1.5, sleeptime=10): + + def logProc(p): + line=p.stdout.readline() + if line: + line="%s%s" % (message, line.rstrip()) + if logger is None: + print line + else: + logger.log(loglevel, line) + + def flushProc(p): + line=p.stdout.readline() + while line: + line="%s%s" % (message, line.strip()) + if logger is None: + print line + else: + logger.log(loglevel, line) + line=p.stdout.readline() + + if loglevel is None: + loglevel=logging.DEBUG + + if timeout is None or timeout<=0: # no timeout set + msg.info('Executing %s...' % args) + starttime = time.time() + p=Popen(args=args, bufsize=bufsize, executable=executable, stdin=stdin, stdout=PIPE, stderr=STDOUT, preexec_fn=preexec_fn, close_fds=close_fds, shell=shell, cwd=cwd, env=env, universal_newlines=universal_newlines, startupinfo=startupinfo, creationflags=creationflags) + while p.poll() is None: + logProc(p) + flushProc(p) + if timeout is not None: + msg.info('Executed call within %d s.' % (time.time()-starttime)) + return p.returncode + + else: #timeout set + n=0 + while n<=retry: + msg.info('Try %i out of %i (time limit %ss) to call %s.' % (n+1, retry+1, timeout, args)) + starttime = time.time() + endtime=starttime+timeout + p=Popen(args=args, bufsize=bufsize, executable=executable, stdin=stdin, stdout=PIPE, stderr=STDOUT, preexec_fn=preexec_fn, close_fds=close_fds, shell=shell, cwd=cwd, env=env, universal_newlines=universal_newlines, startupinfo=startupinfo, creationflags=creationflags) + while p.poll() is None and time.time()<endtime: + logProc(p) + if p.poll() is None: + msg.warning('Timeout limit of %d s reached. Kill subprocess and its children.' % timeout) + parent=p.pid + pids=[parent] + pids.extend(listChildren(parent=parent)) + infanticide(pids) + msg.info('Checking if something is left in buffer.') + flushProc(p) + if n!=retry: + msg.info('Going to sleep for %d s.' % sleeptime) + time.sleep(sleeptime) + n+=1 + timeout*=timefactor + sleeptime*=timefactor + else: + flushProc(p) + msg.info('Executed call within %d s.' % (time.time()-starttime)) + return p.returncode + + msg.warning('All %i tries failed!' % n) + raise Exception + + +## @brief Return a string with a report of the current athena setup +def asetupReport(): + setupMsg = os.linesep + for eVar in ('AtlasProject', 'AtlasVersion', 'AtlasPatch', 'AtlasPatchVersion', 'CMTCONFIG','TestArea'): + if eVar in os.environ: + setupMsg += '\t%s=%s\n' % (eVar, os.environ[eVar]) + else: + setupMsg+ '\t%s undefined\n' % eVar + # Look for patches so that the job can be rerun + if 'TestArea' in os.environ and os.access(os.environ['TestArea'], os.R_OK): + setupMsg += "\n\tPatch packages are:\n" + try: + cmd = ['cmt', 'show', 'packages', os.environ['TestArea']] + cmtProc = Popen(cmd, shell = False, stdout = PIPE, stderr = STDOUT, bufsize = 1) + cmtOut = cmtProc.communicate()[0] + for line in cmtOut.split('\n'): + try: + if line.strip() == '': + continue + (package, packageVersion, packagePath) = line.split() + setupMsg += '\t\t%s\n' % (packageVersion) + except ValueError: + setupMsg += "Warning, unusual output from cmt: %s\n" % line + except (CalledProcessError, OSError), e: + setupMsg += 'Execution of CMT failed: %s' % e + else: + setupMsg+= "No readable patch area found" + + return setupMsg.rstrip() + + +## @brief Test (to the best of our knowledge) if the current release is older +# than a major, minor version number +# @details There's no unambiguous reference to the release that encompasses +# all of the development builds (dev, devval, migs), but almost everything +# can be determined from @c AtlasVersion and @c AtlasBaseDir. If neither of +# those contain version information then we assume a development build +# that is @e new by definition (so we return @c False) +# @param major Major release number +# @param minor Minor release number (if not specified, will not be matched against) +# @return Boolean if current release is found to be older +def releaseIsOlderThan(major, minor=None): + if 'AtlasVersion' not in os.environ or 'AtlasBaseDir' not in os.environ: + msg.warning("Could not find 'AtlasVersion' and 'AtlasBaseDir' in the environment - no release match possible") + return False + try: + # First try AtlasVersion, which is clean + relRegExp = re.compile(r'(?P<major>\d+)\.(?P<minor>\d+)\.(?P<other>.*)') + relMatch = re.match(relRegExp, os.environ['AtlasVersion']) + if not relMatch: + # Now try the final part of AtlasBaseDir + leafDir = os.path.basename(os.environ['AtlasBaseDir']) + relMatch = re.match(relRegExp, leafDir) + if not relMatch: + msg.info('No identifiable numbered release found from AtlasVersion or AtlasBaseDir - assuming dev/devval/mig') + return False + + relmajor = int(relMatch.group('major')) + relminor = int(relMatch.group('minor')) + msg.info('Detected release major {0}, minor {1} (.{2}) from environment'.format(relmajor, relminor, relMatch.group('other'))) + + # Major beats minor, so test this first + if relmajor < major: + return True + if relmajor > major: + return False + + # First case is major equality and don't care about minor + if minor is None or relminor >= minor: + return False + return True + + except Exception, e: + msg.warning('Exception thrown when attempting to detect athena version ({0}). No release check possible'.format(e)) + return False + +## @brief Quote a string array so that it can be echoed back on the command line in a cut 'n' paste safe way +# @param strArray: Array of strings to quote +# @detail Technique is to first quote any pre-existing single quotes, then single quote all of the array +# elements so that the shell sees them as a single argument +def shQuoteStrings(strArray = sys.argv): + return [ "'" + qstring.replace("'", "\\'") + "'" for qstring in strArray ] + + +## @brief Generator to return lines and line count from a file +# @param filename: Filename to open and deliver lines from +# @param strip: If lines get stripped before being returned (default @c True) +# @note This is useful so that multiple parts of code can co-operatively take lines from the file +def lineByLine(filename, strip = True): + linecounter = 0 + f = open(filename, 'r') + for line in f: + linecounter += 1 + if strip: + line = line.strip() + yield line, linecounter + f.close() + + +## #brief XML pretty print an ElementTree.ELement object +# @param element ElementTree.ELement object to print +# @param indent Indent parameter for minidom toprettyxml method +# @param poolFileCatalogFormat Whether to reformat the XML as a classic POOLFILECATALOG document +# @return String with the pretty printed XML version +# @note This is rather a convoluted way to get the correct DOCTYPE +# set and there's probably a better way to do it, but as this +# is a deprecated way of delivering metadata upstream it's not +# worth improving at this stage. +def prettyXML(element, indent = ' ', poolFileCatalogFormat = False): + # Use minidom for pretty printing + # See http://broadcast.oreilly.com/2010/03/pymotw-creating-xml-documents.html + xmlstring = ElementTree.tostring(element, 'utf-8') + try: + metadataDoc = minidom.parseString(xmlstring) + except ExpatError: + # Getting weird \x00 NULLs on the end of some GUIDs, which minidom.parsestring does not like (is this APR?) + msg.warning('Error parsing ElementTree string - will try removing hex literals ({0!r})'.format(xmlstring)) + xmlstring = xmlstring.replace('\x00', '') + metadataDoc = minidom.parseString(xmlstring) + + + if poolFileCatalogFormat is False: + return metadataDoc.toprettyxml(indent=indent, encoding='UTF-8') + + # Now create a new document with the correct doctype for classic POOLFILECATALOG + # See http://stackoverflow.com/questions/2337285/set-a-dtd-using-minidom-in-python + imp = minidom.DOMImplementation() + doctype = imp.createDocumentType(qualifiedName='POOLFILECATALOG', publicId='', systemId='InMemory') + doc = imp.createDocument(None, 'POOLFILECATALOG', doctype) + + # Cut and paste the parsed document into the new one + # See http://stackoverflow.com/questions/1980380/how-to-render-a-doctype-with-pythons-xml-dom-minidom + refel = doc.documentElement + for child in metadataDoc.childNodes: + if child.nodeType==child.ELEMENT_NODE: + doc.replaceChild(doc.importNode(child, True), doc.documentElement) + refel= None + elif child.nodeType!=child.DOCUMENT_TYPE_NODE: + doc.insertBefore(doc.importNode(child, True), refel) + + return doc.toprettyxml(indent=indent, encoding='UTF-8') + + +## @brief Return isoformated 'now' string +# @detail Uses datetime.isoformat method, but suppressing microseconds +def isodate(): + return datetime.now().replace(microsecond=0).isoformat() + + +## @brief Strip a string down to alpha-numeric characters only +# @note This is used to force executor names and substep aliases +# to a form that the substep argument parser will recognise. +# None is still allowed as this is the default for "unset" in +# some cases. +def forceToAlphaNum(string): + if string == None or string.isalnum(): + return string + newstring = '' + for piece in string: + if piece.isalnum(): + newstring += piece + msg.warning("String {0} was stripped to alphanumeric characters only: {1}".format(string, newstring)) + return newstring + + +## @brief Compare metadata for files, but taking into account that GUID can vary +# @detail Compare metadata dictionaries, but allowing for differences in the file_guid property +# as this is generated randomly for file types without an intrinsic GUID +# @param metadata1 Filel metadata dictionary +# @param metadata2 File2 metadata dictionary +# @param giudCheck How to compare GUIDs. Valid values are: +# - @c equal GUIDs must be the same +# - @c valid GUIDs must be valid, but don't have to be the same +# - @c ignore The file_guid key is ignored +# @return True if metadata is the same, otherwise False +def cmpMetadata(metadata1, metadata2, guidCheck = 'valid'): + # First check we have the same files + allFiles = set(metadata1.keys()) | set(metadata2.keys()) + if len(allFiles) > len(metadata1.keys()) or len(allFiles) > len(metadata2.keys()): + msg.warning('In metadata comparison file lists are not equal - fails ({0} != {1}'.format(metadata1, metadata2)) + return False + for file in allFiles: + allKeys = set(metadata1[file].keys()) | set(metadata2[file].keys()) + if len(allKeys) > len(metadata1[file].keys()) or len(allFiles) > len(metadata2[file].keys()): + msg.warning('In metadata comparison key lists are not equal - fails') + return False + for key in allKeys: + if key is 'file_guid': + if guidCheck is 'ignore': + continue + elif guidCheck is 'equal': + if metadata1[file]['file_guid'].upper() == metadata2[file]['file_guid'].upper(): + continue + else: + msg.warning('In metadata comparison strict GUID comparison failed.') + return False + elif guidCheck is 'valid': + try: + uuid.UUID(metadata1[file]['file_guid']) + uuid.UUID(metadata2[file]['file_guid']) + continue + except ValueError: + msg.warning('In metadata comparison found invalid GUID strings.') + return False + if metadata1[file][key] != metadata2[file][key]: + msg.warning('In metadata comparison found different key values: {0!s} != {1!s}'.format(metadata1[file][key], metadata2[file][key])) + return True + + +## @brief Unpack a given tarfile +# @param filename Tar file to unpck +# @param directory Directory target for the unpacking +def unpackTarFile(filename, directory="."): + try: + tar = tarfile.open(filename) + tar.extractall(path=directory) + tar.close() + except Exception, e: + errMsg = 'Error encountered while unpacking {0} to {1}: {2}'.format(filename, directory, e) + msg.error(errMsg) + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_SETUP'), errMsg) + + +## @brief Ensure that the DBRelease tarball has been unpacked +# @detail Extract the dbversion number and look for an unpacked directory. +# If found then this release is already setup. If not then try to unpack +# the tarball. +# @param tarball The tarball file +# @param dbversion The version number (if not given the look at the tarball name to get it) +# @throws trfExceptions.TransformSetupException If the DBRelease tarball is unreadable or the version is not understood +# @return Two element tuple: (@c True if release was unpacked or @c False if release was already unpacked, dbsetup path) +def unpackDBRelease(tarball, dbversion=None): + if dbversion == None: + dbdMatch = re.match(r'DBRelease-([\d\.]+)\.tar\.gz', os.path.basename(tarball)) + if dbdMatch == None: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'), + 'Could not find a valid version in the DBRelease tarball: {0}'.format(tarball)) + dbversion = dbdMatch.group(1) + dbsetup = os.path.abspath(os.path.join("DBRelease", dbversion, "setup.py")) + if os.access(dbsetup, os.R_OK): + msg.debug('DBRelease {0} is already unpacked, found {1}'.format(tarball, dbsetup)) + return False, dbsetup + else: + msg.debug('Will attempt to unpack DBRelease {0}'.format(tarball)) + unpackTarFile(tarball) + msg.info('DBRelease {0} was unpacked'.format(tarball)) + if not os.access(dbsetup, os.R_OK): + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'), + 'DBRelease setup file {0} was not readable, even after unpacking {1}'.format(dbsetup, tarball)) + return True, dbsetup + +## @brief Run a DBRelease setup +# @param setup DMRelease setup script location (absolute or relative path) +# @return: None +def setupDBRelease(setup): + try: + dbdir=os.path.abspath(os.path.dirname(setup)) + msg.debug('Will add {0} to sys.path to load DBRelease setup module'.format(dbdir)) + # N.B. We cannot use __import__ because the X.Y.Z directory name is illegal for a python module path + opath = sys.path + sys.path.insert(0, dbdir) + from setup import Setup + # Instansiate the Setup module, which activates the customisation + setupObj = Setup(dbdir) + sys.path = opath + msg.debug('DBRelease setup module was initialised successfully') + except ImportError, e: + errMsg = 'Import error while trying to load DB Setup module: {0}'.format(e) + msg.error(errMsg) + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'), errMsg) + except Exception, e: + errMsg = 'Unexpected error while trying to load DB Setup module: {0}'.format(e) + msg.error(errMsg) + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'), errMsg) + + +## @brief Validate a DBRelease exists on cvmfs and return the path to the setup script +# @param dbrelease The DBRelease number (X.Y.Z[.A]) or "current" +# @throws trfExceptions.TransformSetupException If the DBRelease setup is unreadable or the dbrelease parameter is not understood +# @return Path to setup.py script for this DBRelease +def cvmfsDBReleaseCheck(dbrelease): + dbsetup = None + dbdMatch = re.match(r'([\d\.]+|current)$', dbrelease) + if dbdMatch: + if 'VO_ATLAS_SW_DIR' in os.environ: + msg.debug('Found site defined path to ATLAS software: {0}'.format(os.environ['VO_ATLAS_SW_DIR'])) + dbsetup = os.path.join(os.environ['VO_ATLAS_SW_DIR'], 'database', 'DBRelease', dbrelease, 'setup.py') + if os.access(dbsetup, os.R_OK): + return dbsetup + msg.warning('Site defined path to ATLAS software seems invalid (failed to access {0}). Will also try standard cvmfs path.'.format(dbsetup)) + else: + msg.debug('Using standard CVMFS path to ATLAS software') + + dbsetup = os.path.join('/cvmfs/atlas.cern.ch/repo/sw/database/DBRelease', dbrelease, 'setup.py') + if not os.access(dbsetup, os.R_OK): + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'), + 'CVMFS DBRelease setup file {0} was not readable'.format(dbsetup)) + msg.debug('Using cvmfs based dbrelease: {0}'.format(os.path.dirname(dbsetup))) + else: + raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'), + 'Unable to interpret DBRelease "{0}" as either a tarball or a CVMFS release directory'.format(dbrelease)) + return dbsetup + + +## @brief Dump a list of arguments to the pickle file given in the 'dumpPickle' argument +def pickledDump(argdict): + if 'dumpPickle' not in argdict: + return + + from PyJobTransforms.trfArgClasses import argument + theArgumentDictionary = {} + for k, v in argdict.iteritems(): + if k is 'dumpPickle': + continue + if isinstance(v, argument): + theArgumentDictionary[k] = v.value + else: + theArgumentDictionary[k] = v + with open(argdict['dumpPickle'], 'w') as pickleFile: + import cPickle as pickle + pickle.dump(theArgumentDictionary, pickleFile) + + +## @brief Dump a list of arguments to the JSON file given in the 'dumpJSON' argument +def JSONDump(argdict): + if 'dumpJSON' not in argdict: + return + + from PyJobTransforms.trfArgClasses import argument + theArgumentDictionary = {} + for k, v in argdict.iteritems(): + if k is 'dumpJSON': + continue + if isinstance(v, argument): + theArgumentDictionary[k] = v.value + else: + theArgumentDictionary[k] = v + with open(argdict['dumpJSON'], 'w') as JSONFile: + import json + json.dump(theArgumentDictionary, JSONFile, sort_keys=True, indent=2) + +## @brief Recursively convert unicode to str, useful when we have just loaded something +# from json (TODO: make the transforms happy with unicode as well as plain str!) +def convertToStr(input): + if isinstance(input, dict): + return dict([(convertToStr(key), convertToStr(value)) for key, value in input.iteritems()]) + #return {convertToStr(key): convertToStr(value) for key, value in input.iteritems()} + elif isinstance(input, list): + return [convertToStr(element) for element in input] + elif isinstance(input, unicode): + return input.encode('utf-8') + else: + return input + + +## @brief Convert a command line option to the dictionary key that will be used by argparse +def cliToKey(option): + return option.lstrip('-').replace('-', '_') + + +## @brief print in a human-readable way the items of a given object +# @detail This function prints in a human-readable way the items of a given +# object. +# @param object to print +def printHR(object): + # dictionary + if isinstance(object, dict): + for key, value in sorted(object.items()): + print u'{key}: {value}'.format(key = key, value = value) + # list or tuple + elif isinstance(object, list) or isinstance(object, tuple): + for element in object: + print element + # other + else: + print object + + +## @brief return a URL-safe, base 64-encoded pseudorandom UUID +# @detail This function returns a URL-safe, base 64-encoded pseudorandom +# Universally Unique IDentifier (UUID). +# @return string of URL-safe, base 64-encoded pseudorandom UUID +def uniqueIdentifier(): + return str(base64.urlsafe_b64encode(uuid.uuid4().bytes).strip("=")) + + +## @brief return either singular or plural units as appropriate for a given +# quantity +# @detail This function returns either singular or plural units as appropriate +# for a given quantity. So, a quantity of 1 would cause the return of singular +# units and a quantity of 2 would cause the return of plural units. +# @param quantity the numerical quantity +# @param unitSingular the string for singular units +# @param unitSingular the string for plural units +# @return string of singular or plural units +def units( + quantity = None, + unitSingular = "unit", + unitPlural = "units" + ): + if quantity == 1: + return unitSingular + else: + return unitPlural + + +## @brief Job: a set of pieces of information relevant to a given work function +# @detail A Job object is a set of pieces of information relevant to a given +# work function. A Job object comprises a name, a work function, work function +# arguments, the work function timeout specification, a +# multiprocessing.Pool.apply_async() object and, ultimately, a result object. +# @param name the Job object name +# @param workFunction the work function object +# @param workFunctionArguments the work function keyword arguments dictionary +# @param workFunctionTimeout the work function timeout specification in seconds +class Job(object): + + ## @brief initialisation method + def __init__( + self, + workFunction = None, + workFunctionKeywordArguments = {}, + workFunctionTimeout = None, + name = None, + ): + self.workFunction = workFunction + self.workFunctionKeywordArguments = workFunctionKeywordArguments + self.workFunctionTimeout = workFunctionTimeout + self.className = self.__class__.__name__ + self.resultGetter = None + if name == None: + self._name = uniqueIdentifier() + else: + self._name = name + if self.workFunction == None: + exceptionMessage = "work function not specified" + msg.error("{notifier}: exception message: {exceptionMessage}".format( + notifier = self.className, + exceptionMessage = exceptionMessage + )) + raise trfExceptions.TransformInternalException( + trfExit.nameToCode('TRF_INTERNAL'), + exceptionMessage + ) + + @property + def name(self): + return self._name + + ## @brief return an object self description string + # @ detail This method returns an object description string consisting of + # a listing of the items of the object self. + # @return object description string + def __str__(self): + descriptionString = "" + for key, value in sorted(vars(self).items()): + descriptionString += str("{key}:{value} ".format( + key = key, + value = value) + ) + return descriptionString + + ## @brief print in a human-readable way the items of the object self + # @detail This function prints in a human-readable way the items of the + # object self. + def printout(self): + printHR(vars(self)) + + +## @brief JobGroup: a set of Job objects and pieces of information relevant to a +# given set of Job objects +# @detail A JobGroup is a set of Job objects and pieces of information relevant +# to a given set of Job objects. A JobGroup object comprises a name, a list of +# Job objects, a timeout and, ultimately, an ordered list of result objects. +# The timeout can be speecified or derived from the summation of the timeout +# specifications of the set of Job objects. +# @param name the JobGroup object name +# @param jobs the list of Job objects +# @param timeout the JobGroup object timeout specification in seconds +class JobGroup(object): + + ## @brief initialisation method + def __init__( + self, + jobs = None, + name = None, + timeout = None + ): + self.jobs = jobs + self.className = self.__class__.__name__ + self.completeStatus = False + self.timeStampSubmission = None + if name == None: + self._name = uniqueIdentifier() + else: + self._name = name + #self.timeStampSubmissionComplete = None #delete + if timeout == None: + self.timeout = 0 + for job in self.jobs: + self.timeout += job.workFunctionTimeout + self.results = [] + + @property + def name(self): + return self._name + + ## @brief return an object self description string + # @ detail This method returns an object description string consisting of + # a listing of the items of the object self. + # @return object description string + def __str__(self): + descriptionString = "" + for key, value in sorted(vars(self).items()): + descriptionString += str("{key}:{value} ".format( + key = key, + value = value) + ) + return descriptionString + + ## @brief return Boolean JobGroup timeout status + # @detail This method returns the timeout status of a JobGroup object. If + # the JobGroup object has not timed out, the Boolean False is returned. If + # the JobGroup object has timed out, the Boolean True is returned. If the + # JobGroup object has been completed or is not submitted, the Boolean False + # is returned. + # @return Boolean indicating the JobGroup timeout status + def timeoutStatus(self): + # If the JobGroup is complete or not submitted, then it is not timed + # out. + if self.completeStatus is True or self.timeStampSubmission is None: + return False + # If the JobGroup is not complete or submitted, then it may be timed + # out. + elif time.time() > self.timeout + self.timeStampSubmission: + return True + else: + return False + + ## @brief print in a human-readable way the items of the object self + # @detail This function prints in a human-readable way the items of the + # object self. + def printout(self): + printHR(vars(self)) + + +## @brief initisation procedure for processes of process pool +def initialise_processes(): + # Multiprocessing uses signals to communicate with subprocesses, so the + # following two lines prevent the transforms signal handlers from + # interfering: + from PyJobTransforms.trfSignal import resetTrfSignalHandlers + resetTrfSignalHandlers() + signal.signal(signal.SIGINT, signal.SIG_IGN) + + +## @brief ParallelJobProcessor: a multiple-process processor of Job objects +# @param jobSubmission Job object or JobGroup object for submission +# @param numberOfProcesses the number of processes in the process pool +class ParallelJobProcessor(object): + + ## @brief initialisation method that accepts submissions and starts pool + # @detail This method is the initialisation method of the parallel job + # processor. It accepts input JobGroup object submissions and prepares a + # pool of workers. + def __init__( + self, + jobSubmission = None, + numberOfProcesses = multiprocessing.cpu_count(), + ): + self.jobSubmission = jobSubmission + self.numberOfProcesses = numberOfProcesses + self.className = self.__class__.__name__ + self.status = "starting" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status) + ) + self.countOfJobs = None + self.countOfRemainingJobs = 0 + self.pool = multiprocessing.Pool( + self.numberOfProcesses, + initialise_processes + ) + msg.debug("{notifier}: pool of {numberOfProcesses} {units} created".format( + notifier = self.className, + numberOfProcesses = str(self.numberOfProcesses), + units = units(quantity = self.numberOfProcesses, + unitSingular = "process", unitPlural = "processes") + )) + self.status = "ready" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status + )) + + ## @brief return an object self-description string + # @detail This method returns an object description string consisting of + # a listing of the items of the object self. + # @return object description string + def __str__(self): + descriptionString = "" + for key, value in sorted(vars(self).items()): + descriptionString += str("{key}:{value} ".format( + key = key, + value = value + )) + return descriptionString + + ## @brief print in a human-readable way the items of the object self + # @detail This function prints in a human-readable way the items of the + # object self. + def printout(self): + printHR(vars(self) + ) + + ## @brief submit a Job object or a JobGroup object for processing + # @detail This method submits a specified Job object or JobGroup object + # for processing. On successful submission, it returns the value 0. + # @param jobSubmission Job object or JobGroup object for submission + def submit( + self, + jobSubmission = None + ): + # If the input submission is not None, then update the jobSubmission + # data attribute to that specified for this method. + if jobSubmission != None: + self.jobSubmission = jobSubmission + self.status = "submitted" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status + )) + # If the input submission is a Job object, contain it in a JobGroup + # object. + if isinstance(self.jobSubmission, Job): + jobGroup = JobGroup( + jobs = [self.jobSubmission,], + ) + self.jobSubmission = jobGroup + # Count the number of jobs. + self.countOfJobs = len(self.jobSubmission.jobs) + self.countOfRemainingJobs = self.countOfJobs + # Build a contemporary list of the names of jobs. + self.listOfNamesOfRemainingJobs = [] + for job in self.jobSubmission.jobs: + self.listOfNamesOfRemainingJobs.append(job.name) + msg.debug("{notifier}: received job group submission '{name}' of {countOfJobs} {units}".format( + notifier = self.className, + name = self.jobSubmission.name, + countOfJobs = self.countOfJobs, + units = units( + quantity = self.countOfRemainingJobs, + unitSingular = "job", + unitPlural = "jobs" + ) + )) + msg.debug(self.statusReport()) + msg.debug("{notifier}: submitting job group submission '{name}' to pool".format( + notifier = self.className, + name = self.jobSubmission.name + )) + # Cycle through all jobs in the input submission and apply each to the + # pool. + for job in self.jobSubmission.jobs: + job.timeStampSubmission = time.time() + msg.debug("{notifier}: job '{name}' submitted to pool".format( + notifier = self.className, + name = job.name + )) + # Apply the job to the pool, applying the object pool.ApplyResult + # to the job as a data attribute. + job.resultGetter = self.pool.apply_async( + func = job.workFunction, + kwds = job.workFunctionKeywordArguments + ) + # Prepare monitoring of job group times in order to detect a job group + # timeout by recording the time of complete submission of the job group. + self.jobSubmission.timeStampSubmission = time.time() + msg.debug("{notifier}: job group submission complete: {countOfJobs} {units} submitted to pool (timestamp: {timeStampSubmission})".format( + notifier = self.className, + countOfJobs = self.countOfJobs, + units = units( + quantity = self.countOfJobs, + unitSingular = "job", + unitPlural = "jobs" + ), + timeStampSubmission = self.jobSubmission.timeStampSubmission + )) + self.status = "processing" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status + )) + return 0 + + ## @brief get results of JobGroup object submission + # @detail This method returns an ordered list of results for jobs + # submitted. + # @return order list of results for jobs + def getResults(self): + # While the number of jobs remaining is greater than zero, cycle over + # all jobs in the JobGroup object submission submission, watching for a + # timeout of the JobGroup object submission. If a result has not been + # retrived for a job (i.e. the Job object does not have a result data + # attribute), then check if a result is available for the job (using the + # method multiprocessing.pool.AsyncResult.ready()). If a result is + # available for the job, then check if the job has run successfully + # (using the method multiprocessing.pool.AsyncResult.successful()). If + # the job has not been successful, raise an exception, otherwise, get + # the result of the job and save it to the result data attribute of the + # job. + msg.debug("{notifier}: checking for job {units}".format( + notifier = self.className, + units = units( + quantity = self.countOfRemainingJobs, + unitSingular = "result", + unitPlural = "results") + ) + ) + while self.countOfRemainingJobs > 0: + # Check for timeout of the job group. If the current timestamp is + # greater than the job group timeout (derived from the sum of the + # set of all job timeout specifications in the job group) + the job + # group submission timestamp, then raise an excepton, otherwise + # cycle over all jobs. + # Allow time for jobs to complete. + time.sleep(0.25) + if self.jobSubmission.timeoutStatus(): + msg.error("{notifier}: job group '{name}' timed out".format( + notifier = self.className, + name = self.jobSubmission.name + )) + self._abort() + exceptionMessage = "timeout of a function in list {listOfNamesOfRemainingJobs}".format( + listOfNamesOfRemainingJobs = self.listOfNamesOfRemainingJobs + ) + msg.error("{notifier}: exception message: {exceptionMessage}".format( + notifier = self.className, + exceptionMessage = exceptionMessage + )) + raise trfExceptions.TransformTimeoutException( + trfExit.nameToCode('TRF_EXEC_TIMEOUT'), + exceptionMessage + ) + else: + for job in self.jobSubmission.jobs: + self.listOfNamesOfRemainingJobs = [] + if not hasattr(job, 'result'): + # Maintain a contemporary list of the names of remaining + # jobs. + self.listOfNamesOfRemainingJobs.append(job.name) + # If the result of the job is ready... + if job.resultGetter.ready(): + msg.debug( + "{notifier}: result ready for job '{name}'".format( + notifier = self.className, + name = job.name + ) + ) + job.successStatus = job.resultGetter.successful() + msg.debug( + "{notifier}: job '{name}' success status: {successStatus}".format( + notifier = self.className, + name = job.name, + successStatus = job.successStatus + ) + ) + # If the job was successful, create the result data + # attribute of the job and save the result to it. + if job.successStatus: + job.result = job.resultGetter.get() + msg.debug( + "{notifier}: result of job '{name}': {result}".format( + notifier = self.className, + name = job.name, + result = job.result + ) + ) + self.countOfRemainingJobs -= 1 + msg.debug( + "{notifier}: {countOfRemainingJobs} {units} remaining".format( + notifier = self.className, + countOfRemainingJobs = self.countOfRemainingJobs, + units = units( + quantity = self.countOfRemainingJobs, + unitSingular = "job", + unitPlural = "jobs" + ) + ) + ) + # If the job was not successful, raise an exception + # and abort processing. + elif not job.successStatus: + msg.error( + "{notifier}: job '{name}' failed".format( + notifier = self.className, + name = job.name + ) + ) + self._abort() + exceptionMessage = "failure of function '{name}' with arguments {arguments}".format( + name = job.workFunction.__name__, + arguments = job.workFunctionKeywordArguments + ) + msg.error("{notifier}: exception message: {exceptionMessage}".format( + notifier = self.className, + exceptionMessage = exceptionMessage + )) + raise trfExceptions.TransformExecutionException( + trfExit.nameToCode('TRF_EXEC_FAIL'), + exceptionMessage + ) + # All results having been returned, create the 'results' list data + # attribute of the job group and append all individual job results to + # it. + self.jobSubmission.timeStampComplete = time.time() + self.jobSubmission.completeStatus = True + msg.debug("{notifier}: all {countOfJobs} {units} complete (timestamp: {timeStampComplete})".format( + notifier = self.className, + countOfJobs = self.countOfJobs, + units = units( + quantity = self.countOfJobs, + unitSingular = "job", + unitPlural = "jobs" + ), + timeStampComplete = self.jobSubmission.timeStampComplete + )) + self.jobSubmission.processingTime = self.jobSubmission.timeStampComplete - self.jobSubmission.timeStampSubmission + msg.debug("{notifier}: time taken to process all {units}: {processingTime}".format( + notifier = self.className, + countOfJobs = self.countOfJobs, + units = units( + quantity = self.countOfJobs, + unitSingular = "job", + unitPlural = "jobs" + ), + processingTime = self.jobSubmission.processingTime + )) + for job in self.jobSubmission.jobs: + self.jobSubmission.results.append(job.result) + self._terminate() + return self.jobSubmission.results + self._terminate() + + ## @brief return a status report string + # @detail This method returns a status report string, detailing + # information on the JobGroup submission and on the job processing status. + # @return status report string + def statusReport(self): + statusReport = "\n{notifier}:\n status report:".format( + notifier = self.className + ) + # information on parallel job processor + statusReport += "\n parallel job processor configuration:" + statusReport += "\n status: {notifier}".format( + notifier = str(self.status) + ) + statusReport += "\n number of processes: {notifier}".format( + notifier = str(self.numberOfProcesses) + ) + # information on job group submission + statusReport += "\n job group submission: '{notifier}'".format( + notifier = self.jobSubmission.name + ) + statusReport += "\n total number of jobs: {notifier}".format( + notifier = str(self.countOfJobs) + ) + statusReport += "\n number of incomplete jobs: {notifier}".format( + notifier = str(self.countOfRemainingJobs) + ) + statusReport += "\n names of incomplete jobs: {notifier}".format( + notifier = self.listOfNamesOfRemainingJobs + ) + # information on jobs (if existent) + if self.jobSubmission.jobs: + statusReport += "\n jobs:" + for job in self.jobSubmission.jobs: + statusReport += "\n job '{name}':".format( + name = job.name + ) + statusReport += "\n workFunction: '{name}'".format( + name = job.workFunction.__name__ + ) + statusReport += "\n workFunctionKeywordArguments: '{arguments}'".format( + arguments = job.workFunctionKeywordArguments + ) + statusReport += "\n workFunctionTimeout: '{timeout}'".format( + timeout = job.workFunctionTimeout + ) + if hasattr(job, 'result'): + statusReport += "\n result: '{result}'".format( + result = job.result + ) + # statistics of parallel job processor run + if hasattr(self.jobSubmission, 'processingTime'): + statusReport += "\n statistics:" + if hasattr(self.jobSubmission, 'processingTime'): + statusReport += "\n total processing time: {processingTime} s".format( + processingTime = self.jobSubmission.processingTime + ) + return statusReport + + ## @brief abort parallel job processor + # @detail This method aborts the parallel job processor. It is used + # typically when an exception is raised. + def _abort(self): + self.status = "aborting" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status + )) + self._terminate() + + ## @brief terminate parallel job processor + # @detail This method terminates the parallel job processor. It terminates + # the subprocesses of the parallel job processor. It is used typically + # when terminating the parallel job processor on successful completion of + # job processing and when aborting the parallel job processor. + def _terminate(self): + self.status = "terminating" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status + )) + msg.debug("{notifier}: terminating pool of {numberOfProcesses} {units}".format( + notifier = self.className, + numberOfProcesses = str(self.numberOfProcesses), + units = units( + quantity = self.numberOfProcesses, + unitSingular = "process", + unitPlural = "processes" + ) + )) + self.pool.terminate() + self.pool.join() + self.status = "finished" + msg.debug("{notifier}: status: {status}".format( + notifier = self.className, + status = self.status + )) + msg.debug(self.statusReport()) diff --git a/Tools/PyJobTransforms/python/trfValidateRootFile.py b/Tools/PyJobTransforms/python/trfValidateRootFile.py new file mode 100755 index 0000000000000000000000000000000000000000..2a8ea7b854a3e5b30c5d9e944bee7ee2fa997e95 --- /dev/null +++ b/Tools/PyJobTransforms/python/trfValidateRootFile.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package PyJobTransforms.trfValidateRootFile +# @brief Functionality to test a Root file for corruption +# @author atlas-comp-transforms-dev@cern.ch +# @todo The main() CLI should migrate to @c scripts and this module just implement functions + + + +import sys +import logging + +from PyUtils import RootUtils +ROOT = RootUtils.import_root() +from ROOT import TFile, TTree, TKey, TDirectory, TClass, TList, TObjArray, TStopwatch, TBasket + +msg = logging.getLogger(__name__) + +def checkBranch(branch): + + msg.debug('Checking branch %s...' % branch.GetName()) + + nBaskets=branch.GetWriteBasket(); + + msg.debug('Checking %s baskets...' % nBaskets) + + for iBasket in range(nBaskets): + basket=branch.GetBasket(iBasket); + if not basket: + msg.warning('Basket %s of branch %s is corrupted.' % (iBasket, branch.GetName() )) + return 1 + + listOfSubBranches=branch.GetListOfBranches(); + msg.debug('Checking %s subbranches...' % listOfSubBranches.GetEntries()) + for subBranch in listOfSubBranches: + if checkBranch(subBranch)==1: + return 1; + + msg.debug('Branch %s looks ok.' % branch.GetName()) + return 0 + + +def checkTreeBasketWise(tree): + + listOfBranches=tree.GetListOfBranches() + + msg.debug('Checking %s branches ...' % listOfBranches.GetEntries()) + + for branch in listOfBranches: + if checkBranch(branch)==1: + msg.warning('Tree %s is corrupted (branch %s ).' % (tree.GetName(), branch.GetName())) + return 1 + + return 0 + + +def checkTreeEventWise(tree): + + nEntries=tree.GetEntries() + + msg.debug('Checking %s entries...' % nEntries) + + for i in range(nEntries): + if tree.GetEntry(i)<0: + msg.warning('Event %s of tree %s is corrupted.' % (i, tree.GetName())) + return 1 + + return 0 + + +def checkDirectory(directory, type, requireTree): + + msg.debug('Checking directory %s...' % directory.GetName()) + + listOfKeys=directory.GetListOfKeys() + + msg.debug('Checking %s keys... ' % listOfKeys.GetEntries()) + + for key in listOfKeys: + + msg.debug('Looking at key %s...' % key.GetName()) + msg.debug('Key is of class %s.' % key.GetClassName()) + + object=directory.Get(key.GetName()) + if not object: + msg.warning("Can't get object of key %s." % key.GetName()) + return 1 + + if requireTree and not isinstance(object, TTree): + msg.warning("Object %s is not of class TTree!" % object.GetName()) + return 1 + + if isinstance(object,TTree): + + msg.debug('Checking tree %s ...' % object.GetName()) + + if type=='event': + if checkTreeEventWise(object)==1: + return 1 + elif type=='basket': + if checkTreeBasketWise(object)==1: + return 1 + + msg.debug('Tree %s looks ok.' % object.GetName()) + + if isinstance(object, TDirectory): + if checkDirectory(object, type, requireTree)==1: + return 1 + + msg.debug('Directory %s looks ok.' % directory.GetName()) + return 0 + + +def checkFile(fileName, type, requireTree): + + msg.info('Checking file %s.' % fileName) + + file=TFile.Open(fileName) + + if not file: + msg.warning("Can't access file %s." % fileName) + return 1 + + if not file.IsOpen(): + msg.warning("Can't open file %s." % fileName) + return 1 + + if file.IsZombie(): + msg.warning("File %s is a zombie." % fileName) + file.Close() + return 1 + + if file.TestBit(TFile.kRecovered): + msg.warning("File %s needed to be recovered." % fileName) + file.Close() + return 1 + + if checkDirectory(file, type, requireTree)==1: + msg.warning("File %s is corrupted." % fileName) + file.Close() + return 1 + + file.Close(); + msg.info("File %s looks ok." % fileName) + return 0 + + +def usage(): + print "Usage: validate filename type requireTree verbosity" + print "'type' must be either 'event' or 'basket'" + print "'requireTree' must be either 'true' or 'false'" + print "'verbosity' must be either 'on' or 'off'" + + return 2 + + +def main(argv): + + clock=TStopwatch() + + argc=len(argv) + + if (argc!=5): + return usage() + + fileName=argv[1] + type=argv[2] + requireTree=argv[3] + verbosity=argv[4] + + + if type!="event" and type!="basket": + return usage() + + if requireTree!="true" and requireTree!="false": + return usage() + + if verbosity=="on": + msg.setLevel(logging.DEBUG) + elif verbosity=="off": + msg.setLevel(logging.INFO) + else: + return usage() + + rc=checkFile(fileName,type, requireTree) + msg.debug('Returning %s' % rc) + + clock.Stop(); + clock.Print(); + + return rc + + +if __name__ == '__main__': + + ch=logging.StreamHandler(sys.stdout) +# ch.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + msg.addHandler(ch) + + rc=main(sys.argv) + sys.exit(rc) + diff --git a/Tools/PyJobTransforms/python/trfValidation.py b/Tools/PyJobTransforms/python/trfValidation.py new file mode 100644 index 0000000000000000000000000000000000000000..9da2cef73ffb907f95f36b2b3e46ffe9df601bcc --- /dev/null +++ b/Tools/PyJobTransforms/python/trfValidation.py @@ -0,0 +1,857 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfValidation +# +# @brief Validation control for job transforms +# @details Contains validation classes controlling how the transforms +# will validate jobs they run. +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfValidation.py 614626 2014-09-02 14:22:48Z volkmer $ +# @note Old validation dictionary shows usefully different options: +# <tt>self.validationOptions = {'testIfEmpty' : True, 'testIfNoEvents' : False, 'testIfExists' : True, +# 'testIfCorrupt' : True, 'testCountEvents' : True, 'extraValidation' : False, +# 'testMatchEvents' : False, 'testEventMinMax' : True , 'stopOnEventCountNone' : True, +# 'continueOnZeroEventCount' : True}</tt> +import inspect +import fnmatch +import os +import re +import sys +import unittest + +from subprocess import Popen, STDOUT, PIPE +from xml.etree import ElementTree + +import logging +msg = logging.getLogger(__name__) + +from PyUtils import RootUtils + +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfLogger import stdLogLevels, stdLogLevelsByCritcality +from PyJobTransforms.trfArgClasses import argFile + +import PyJobTransforms.trfExceptions as trfExceptions +import PyJobTransforms.trfUtils as trfUtils + + +# @brief Check a Pool file for corruption, return N events or -1 if access problem, -2 if corruption +def corruptionTestPool(filename, verbose=False): + if not os.access(filename, os.R_OK): + msg.info("ERROR can't access file %s" % filename) + return -1 + + ROOT = RootUtils.import_root() + from ROOT import TFile, TTree + import PyCintex + + try: + f = TFile.Open(filename) + except: + msg.info("Can't open file %s" % filename) + return -1 + + nEvents = None + + keys = f.GetListOfKeys() + for k in keys: + try: + tn = k.GetName() + t = f.Get(tn) + if not isinstance(t, TTree): return + except: + msg.info("Can't get tree %s from file %s" % (tn, fn)) + f.Close() + return -1 + + if (verbose): msg.info("Working on tree %s" % tn) + n = t.GetEntriesFast() + for i in range(n): + s = t.GetEntry(i) + if s <= 0: + msg.info("Tree %s: Found corruption in event %i" % (i, n)) + f.Close() + return -2 + else: + if verbose and i > 0 and i % 100 == 0: + msg.info("Checking event %s" % i) + msg.info("Tree %s: %i event(s) ok" % (tn, n)) + + # Use CollectionTree determine the number of events + if tn == 'CollectionTree': + nEvents = n + pass # end of loop over trees + + f.Close() + msg.info("ROOT file %s looks ok" % filename) + if n is None: + msg.info("Failed to determine number of events in file %s. No tree named 'CollectionTree'" % filename) + return 0 + return n + +# @brief Check BS file for corruption +def corruptionTestBS(filename): + # First try AtlListBSEvents.exe -c %filename: + cmd = ['AtlListBSEvents.exe', '-c', filename] + p = Popen(cmd, shell=False, stdout=PIPE, stderr=STDOUT, close_fds=True) + while p.poll() is None: + line = p.stdout.readline() + if line: + msg.info("AtlListBSEvents.exe Report: %s" % line.strip()) + rc = p.returncode + return rc + + +## @brief Class of patterns that can be ignored from athena logfiles +class ignorePatterns(object): + + ## @brief Load error patterns from files + # @details Load regular expressions to be used in logfile parsing + # @files Files to load up structured error patterns from + # @extraSearch Extra regexp strings to @a search against + def __init__(self, files=['atlas_error_mask.db'], extraSearch = []): + # Setup structured search patterns + self._structuredPatterns = [] + self._initalisePatterns(files) + + # Setup extra search patterns + self._searchPatterns = [] + self._initialiseSerches(extraSearch) + + @property + def structuredPatterns(self): + return self._structuredPatterns + + @property + def searchPatterns(self): + return self._searchPatterns + + def _initalisePatterns(self, files): + for patternFile in files: + if patternFile == "None": + continue + fullName = trfUtils.findFile(os.environ['DATAPATH'], patternFile) + if not fullName: + msg.warning('Error pattern file {0} could not be found in DATAPATH'.format(patternFile)) + continue + try: + with open(fullName) as patternFileHandle: + msg.debug('Opened error file {0} from here: {1}'.format(patternFile, fullName)) + + for line in patternFileHandle: + line = line.strip() + if line.startswith('#') or line == '': + continue + try: + # N.B. At the moment release matching is not supported! + (who, level, message) = [ s.strip() for s in line.split(',', 2) ] + if who == "": + # Blank means match anything, so make it so... + who = "." + reWho = re.compile(who) + reLevel = level # level is not a regexp (for now) + reMessage = re.compile(message) + except ValueError: + msg.warning('Could not parse this line as a valid error pattern: {0}'.format(line)) + continue + except re.error, e: + msg.warning('Could not parse valid regexp from {0}: {1}'.format(message, e)) + continue + + msg.debug('Successfully parsed: who={0}, level={1}, message={2}'.format(who, level, message)) + + self._structuredPatterns.append({'service': reWho, 'level': level, 'message': reMessage}) + + except (IOError, OSError) as (errno, errMsg): + msg.warning('Failed to open error pattern file %s: %s' % (fullName, errMsg)) + + + def _initialiseSerches(self, searchStrings=[]): + for string in searchStrings: + try: + self._searchPatterns.append(re.compile(string)) + msg.debug('Successfully parsed additional logfile search string: {0}'.format(string)) + except re.error, e: + msg.warning('Could not parse valid regexp from {0}: {1}'.format(message, e)) + + + +## @brief A class holding report information from scanning a logfile +# This is pretty much a virtual class, fill in the specific methods +# when you know what type of logfile you are dealing with +class logFileReport(object): + def __init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR']): + + # We can have one logfile or a set + if isinstance(logfile, str): + self._logfile = [logfile, ] + else: + self._logfile = logfile + + self._msgLimit = msgLimit + self._msgDetails = msgDetailLevel + self._re = None + + if logfile: + self.scanLogFile(logfile) + + def resetReport(self): + pass + + def scanLogFile(self): + pass + + def worstError(self): + pass + + def firstError(self): + pass + + def __str__(self): + return '' + + +## @class athenaLogFileReport +# @brief Logfile suitable for scanning logfiles with an athena flavour, i.e., +# lines of the form "SERVICE LOGLEVEL MESSAGE" +class athenaLogFileReport(logFileReport): + ## @brief Class constructor + # @param logfile Logfile (or list of logfiles) to scan + # @param msgLimit The number of messages in each category on which a + def __init__(self, logfile, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None): + if ignoreList: + self._ignoreList = ignoreList + else: + self._ignoreList = ignorePatterns() + + ## @note This is the regular expression match for athena logfile lines + # Match first strips off any HH:MM:SS prefix the transform has added, then + # takes the next group of non-whitespace characters as the service, then + # then matches from the list of known levels, then finally, ignores any last + # pieces of whitespace prefix and takes the rest of the line as the message + self._regExp = re.compile(r'(?P<prefix>[\d\:]+)?\s+(?P<service>[^\s]+)\s+(?P<level>' + '|'.join(stdLogLevels) + r')\s+(?P<message>.*)') + + self._metaPat = re.compile(r"^.*\sMetaData:\s+(.*?)\s*=\s*(.*)$") + self._metaData = {} + + self.resetReport() + + super(athenaLogFileReport, self).__init__(logfile, msgLimit, msgDetailLevel) + + ## Produce a python dictionary summary of the log file report for inclusion + # in the executor report + @property + def python(self): + errorDict = {'countSummary': {}, 'details': {}} + for level, count in self._levelCounter.iteritems(): + errorDict['countSummary'][level] = count + if self._levelCounter[level] > 0 and len(self._errorDetails[level]) > 0: + errorDict['details'][level] = [] + for error in self._errorDetails[level]: + errorDict['details'][level].append(error) + return errorDict + + def resetReport(self): + self._levelCounter = {} + for level in stdLogLevels.keys() + ['UNKNOWN', 'IGNORED']: + self._levelCounter[level] = 0 + + self._errorDetails = {} + for level in self._levelCounter.keys(): + self._errorDetails[level] = [] + # Format: + # List of dicts {'message': errMsg, 'firstLine': lineNo, 'count': N} + + + def scanLogFile(self, resetReport=False): + if resetReport: + self.resetReport() + + for log in self._logfile: + msg.debug('Now scanning logfile {0}'.format(log)) + # N.B. Use the generator so that lines can be grabbed by subroutines, e.g., core dump svc reporter + try: + myGen = trfUtils.lineByLine(log) + except IOError, e: + msg.error('Failed to open transform logfile {0}: {1:s}'.format(log, e)) + # Return this as a small report + self._levelCounter['ERROR'] = 1 + self._errorDetails['ERROR'] = {'message': str(e), 'firstLine': 0, 'count': 1} + return + for line, lineCounter in myGen: + m = self._metaPat.search(line) + if m is not None: + key, value = m.groups() + self._metaData[key] = value + + m = self._regExp.match(line) + if m == None: + # We didn't manage to get a recognised standard line from the file + # But we can check for certain other interesting things, like core dumps + if 'Core dump from CoreDumpSvc' in line > -1: + msg.warning('Detected CoreDumpSvc report - activating core dump svc grabber') + self.coreDumpSvcParser(myGen, line, lineCounter) + continue + # Add the G4 exceptipon parsers + if 'G4Exception-START' in line > -1: + msg.warning('Detected G4 9.4 exception report - activating G4 exception grabber') + self.g4ExceptionParser(myGen, line, lineCounter) + continue + if '*** G4Exception' in line > -1: + msg.warning('Detected G4 exception report - activating G4 exception grabber') + self.g494ExceptionParser(myGen, line, lineCounter) + continue + # Add the python exception parser + if 'Shortened traceback (most recent user call last)' in line: + msg.warning('Detected python exception - activating python exception grabber') + self.pythonExceptionParser(myGen, line, lineCounter) + continue + msg.debug('Non-standard line in %s: %s' % (log, line)) + self._levelCounter['UNKNOWN'] += 1 + continue + + # Line was matched successfully + fields = {} + for matchKey in ('service', 'level', 'message'): + fields[matchKey] = m.group(matchKey) + msg.debug('Line parsed as: {0}'.format(fields)) + + # Check this is not in our ignore list + ignoreFlag = False + for ignorePat in self._ignoreList.structuredPatterns: + serviceMatch = ignorePat['service'].match(fields['service']) + levelMatch = (ignorePat['level'] == "" or ignorePat['level'] == fields['level']) + messageMatch = ignorePat['message'].match(fields['message']) + if serviceMatch and levelMatch and messageMatch: + msg.info('Error message "{0}" was ignored at line {1} (structured match)'.format(line, lineCounter)) + ignoreFlag = True + break + if ignoreFlag is False: + for searchPat in self._ignoreList.searchPatterns: + if searchPat.search(line): + msg.info('Error message "{0}" was ignored at line {1} (search match)'.format(line, lineCounter)) + ignoreFlag = True + break + if ignoreFlag: + # Got an ignore - message this to a special IGNORED error + fields['level'] = 'IGNORED' + + # Count this error + self._levelCounter[fields['level']] += 1 + + # Record some error details + # N.B. We record 'IGNORED' errors as these really should be flagged for fixing + if fields['level'] is 'IGNORED' or stdLogLevels[fields['level']] >= self._msgDetails: + detailsHandled = False + for seenError in self._errorDetails[fields['level']]: + if seenError['message'] == line: + seenError['count'] += 1 + detailsHandled = True + break + if detailsHandled == False: + self._errorDetails[fields['level']].append({'message': line, 'firstLine': lineCounter, 'count': 1}) + + + ## Return the worst error found in the logfile (first error of the most serious type) + def worstError(self): + worst = stdLogLevels['DEBUG'] + worstName = 'DEBUG' + for lvl, count in self._levelCounter.iteritems(): + if count > 0 and stdLogLevels.get(lvl, 0) > worst: + worstName = lvl + worst = stdLogLevels[lvl] + if len(self._errorDetails[worstName]) > 0: + firstError = self._errorDetails[worstName][0] + else: + firstError = None + + return {'level': worstName, 'nLevel': worst, 'firstError': firstError} + + + ## Return the first error found in the logfile above a certain loglevel + def firstError(self, floor='ERROR'): + firstLine = firstError = None + firstLevel = stdLogLevels[floor] + firstName = floor + for lvl, count in self._levelCounter.iteritems(): + if (count > 0 and stdLogLevels.get(lvl, 0) >= floor and + (firstError == None or self._errorDetails[lvl][0]['firstLine'] < firstLine)): + firstLine = self._errorDetails[lvl][0]['firstLine'] + firstLevel = stdLogLevels[lvl] + firstName = lvl + firstError = self._errorDetails[lvl][0] + + return {'level': firstName, 'nLevel': firstLevel, 'firstError': firstError} + + ## @brief Attempt to suck a core dump report from the current logfile + # @note: Current implementation just eats lines until a 'normal' line is seen. + # There is a slight problem here in that the end of core dump trigger line will not get parsed + # TODO: fix this (OTOH core dump is usually the very last thing and fatal!) + def coreDumpSvcParser(self, lineGenerator, firstline, firstLineCount): + coreDumpReport = firstline + for line, linecounter in lineGenerator: + m = self._regExp.match(line) + if m == None: + coreDumpReport += os.linesep + line + else: + # Can this be done - we want to push the line back into the generator to be + # reparsed in the normal way (might need to make the generator a class with the + # __exec__ method supported (to get the line), so that we can then add a + # pushback onto an internal FIFO stack +# lineGenerator.pushback(line) + break + + # Core dumps are always fatal... + msg.debug('Identified core dump - adding to error detail report') + self._levelCounter['FATAL'] += 1 + self._errorDetails['FATAL'].append({'message': coreDumpReport, 'firstLine': firstLineCount, 'count': 1}) + + def g494ExceptionParser(self, lineGenerator, firstline, firstLineCount): + g4Report = firstline + g4lines = 1 + if not 'Aborting execution' in g4Report: + for line, linecounter in lineGenerator: + g4Report += os.linesep + line + g4lines += 1 + # Test for the closing string + if '*** ' in line: + break + if g4lines >= 25: + msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount)) + break + + # G4 exceptions can be fatal or they can be warnings... + msg.debug('Identified G4 exception - adding to error detail report') + if "just a warning" in g4Report: + self._levelCounter['WARNING'] += 1 + self._errorDetails['WARNING'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1}) + else: + self._levelCounter['FATAL'] += 1 + self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1}) + + + def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount): + g4Report = firstline + g4lines = 1 + for line, linecounter in lineGenerator: + g4Report += os.linesep + line + g4lines += 1 + # Test for the closing string + if 'G4Exception-END' in line: + break + if g4lines >= 25: + msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount)) + break + + # G4 exceptions can be fatal or they can be warnings... + msg.debug('Identified G4 exception - adding to error detail report') + if "-------- WWWW -------" in g4Report: + self._levelCounter['WARNING'] += 1 + self._errorDetails['WARNING'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1}) + else: + self._levelCounter['FATAL'] += 1 + self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1}) + + def pythonExceptionParser(self, lineGenerator, firstline, firstLineCount): + pythonExceptionReport = "" + lastLine = firstline + pythonErrorLine = firstLineCount + pyLines = 1 + for line, linecounter in lineGenerator: + if 'Py:Athena' in line and 'INFO leaving with code' in line: + pythonExceptionReport = lastLine + pythonErrorLine = linecounter-1 + break + if pyLines >= 25: + msg.warning('Could not identify python exception correctly scanning {0} log lines after line {1}'.format(pyLines, firstLineCount)) + pythonExceptionReport = "Unable to identify specific exception" + pythonErrorLine = firstLineCount + break + lastLine = line + pyLines += 1 + + msg.debug('Identified python exception - adding to error detail report') + self._levelCounter['FATAL'] += 1 + self._errorDetails['FATAL'].append({'message': pythonExceptionReport, 'firstLine': pythonErrorLine, 'count': 1}) + + + def __str__(self): + return str(self._levelCounter) + str(self._errorDetails) + + +## @brief return integrity of file using appropriate validation function +# @ detail This method returns the integrity of a specified file using a +# @ specified validation function. +def returnIntegrityOfFile(file, functionName): + try: + import PyJobTransforms.trfFileValidationFunctions as trfFileValidationFunctions + except Exception as exception: + msg.error('Failed to import module PyJobTransforms.trfFileValidationFunctions with error {error}'.format(error = exception)) + raise + validationFunction = getattr(trfFileValidationFunctions, functionName) + return validationFunction(file) + + +## @brief perform standard file validation +# @ detail This method performs standard file validation in either serial or +# @ parallel and updates file integrity metadata. +def performStandardFileValidation(dict, io, parallelMode = False): + if parallelMode == False: + msg.info('Starting legacy (serial) file validation') + for (key, arg) in dict.items(): + if not isinstance(arg, argFile): + continue + if not arg.io == io: + continue + + msg.info('Validating data type %s...' % key) + + for file in arg.value: + msg.info('Validating file %s...' % file) + + if io == "output": + msg.info('{0}: Testing corruption...'.format(file)) + if arg.getSingleMetadata(file, 'integrity') is True: + msg.info('Corruption test passed.') + elif arg.getSingleMetadata(file, 'integrity') is False: + msg.error('Corruption test failed.') + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % file) + elif arg.getSingleMetadata(file, 'integrity') == 'UNDEFINED': + msg.info('No corruption test defined.') + else: + msg.error('Unknown rc from corruption test.') + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % file) + + + msg.info('{0}: Testing event count...'.format(file)) + if arg.getSingleMetadata(file, 'nentries') is not None: + msg.info('Event counting test passed ({0!s} events).'.format(arg.getSingleMetadata(file, 'nentries'))) + else: + msg.error('Event counting test failed.') + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % file) + + + msg.info('{0}: Checking if guid exists...'.format(file)) + if arg.getSingleMetadata(file, 'file_guid') is None: + msg.error('Guid could not be determined.') + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % file) + elif arg.getSingleMetadata(file, 'file_guid') == 'UNDEFINED': + msg.info('Guid not defined.') + else: + msg.info('Guid is %s' % arg.getSingleMetadata(file, 'file_guid')) + msg.info('Stopping legacy (serial) file validation') + if parallelMode == True: + msg.info('Starting parallel file validation') + from PyJobTransforms.trfValidateRootFile import checkFile + # Create lists of files and args. These lists are to be used with zip in + # order to check and update file integrity metadata as appropriate. + fileList = [] + argList = [] + # Create a list of the integrity functions for files. + integrityFunctionList = [] + # Create a list for collation of file validation jobs for submission to + # the parallel job processor. + jobs = [] + for (key, arg) in dict.items(): + if not isinstance(arg, argFile): + continue + if not arg.io == io: + continue + msg.debug('Collating list of files for validation') + for file in arg.value: + msg.debug('Appending file {fileName} to list of files for validation'.format(fileName = str(file))) + # Append the current file to the file list. + fileList.append(file) + # Append the current arg to the arg list. + argList.append(arg) + # Append the current integrity function name to the integrity + # function list if it exists. If it does not exist, raise an + # exception. + if arg.integrityFunction: + integrityFunctionList.append(arg.integrityFunction) + else: + msg.error('Validation function for file {fileName} not available for parallel file validation'.format(fileName = str(file))) + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'Validation function for file %s not available for parallel file validation' % str(file)) + # Compose a job for validation of the current file using the + # appropriate validation function, which is derived from the + # associated data attribute arg.integrityFunction. + jobs.append( + trfUtils.Job( + name = "validation of file {fileName}".format( + fileName = str(file)), + workFunction = returnIntegrityOfFile, + workFunctionKeywordArguments = { + 'file': file, + 'functionName': arg.integrityFunction + }, + workFunctionTimeout = 600 + ) + ) + # Contain the file validation jobs in a job group for submission to the + # parallel job processor. + jobGroup1 = trfUtils.JobGroup( + name = "standard file validation", + jobs = jobs + ) + # Prepare the parallel job processor. + parallelJobProcessor1 = trfUtils.ParallelJobProcessor() + # Submit the file validation jobs to the parallel job processor. + msg.info('Submitting file validation jobs to parallel job processor') + parallelJobProcessor1.submit(jobSubmission = jobGroup1) + resultsList = parallelJobProcessor1.getResults() + msg.info('Parallel file validation complete') + # Update file metadata with integrity results using the lists fileList, + # argList and resultsList. + msg.info('Processing file integrity results') + for currentFile, currentArg, currentIntegrityFunction, currentResult in zip(fileList, argList, integrityFunctionList, resultsList): + msg.info('{IO} file {fileName} has integrity status {integrityStatus} as determined by integrity function {integrityFunction}'.format( + IO = str(io), + fileName = str(currentFile), + integrityStatus = str(currentResult), + integrityFunction = str(currentIntegrityFunction) + )) + # If the first (Boolean) element of the result tuple for the current + # file is True, update the integrity metadata. If it is False, raise + # an exception. + if currentResult[0] == True: + msg.info('Updating integrity metadata for file {fileName}'.format(fileName = str(currentFile))) + currentArg._setMetadata(files=[currentFile,], metadataKeys={'integrity': currentResult[0]}) + else: + exceptionMessage = "{IO} file validation failure on file {fileName} with integrity status {integrityStatus} as determined by integrity function {integrityFunction}".format( + IO = str(io), + fileName = str(currentFile), + integrityStatus = str(currentResult), + integrityFunction = str(currentIntegrityFunction) + ) + msg.error("exception message: {exceptionMessage}".format( + exceptionMessage = exceptionMessage + )) + if io == 'input': + exitCodeName = 'TRF_INPUT_FILE_VALIDATION_FAIL' + elif io == 'output': + exitCodeName = 'TRF_OUTPUT_FILE_VALIDATION_FAIL' + raise trfExceptions.TransformValidationException( + trfExit.nameToCode(exitCodeName), + exceptionMessage + ) + # Perform a check to determine if the file integrity metadata is + # correct. + if currentArg.getSingleMetadata(currentFile, metadataKey = 'integrity', populate = False) == currentResult[0]: + msg.debug("file integrity metadata update successful") + else: + msg.error("file integrity metadata update unsuccessful") + msg.info('Stopping parallel file validation') + + +## @brief Small class used for vailiadating event counts between input and output files +class eventMatch(object): + + ## @brief check in- and output event counts + # @details Class to verify that in- and output event counts are in a reasonable relationship. + # @param @c executor eventCountConf instance for this check + # @param @c eventCountConf dictionary to replace or append to default, see code for details + # @param @c eventCountConfOver write Replace rather than append eventCountConf (Default: False) + # Takes efficiencies into account. + # All data is taken from _trf dict + def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False): + self._executor = executor + + ## @note This double dictionary is formed of INPUT data, then a dictionary of the expected + # event counts from different output data types. If there is no exact match for the output + # datatype then globbing matches are allowed. + # Thus self._eventCountConf[input][output] gives the test for input -> output. + # The dictionary recognises the following options: + # - @c match : exact match of input and output events, n_in = n_out + # - @c filter : any event count from 0 up to input events is ok, 0 <= n_out <= n_in + # - @c minEff : any event count from n_in * eventAcceptanceEfficiency <= n_out <= n_in + # - @c float in range [0,1] : same as minEff with this efficiency factor + # For any case where the output events can be less than the input ones an integer conversion + # is applied, so the result is rounded down. i.e., 1 * 0.5 -> 0. + simEventEff = 0.995 + self._eventCountConf = {} + self._eventCountConf['EVNT'] = {'EVNT_MRG':"match", "HITS": simEventEff, "EVNT_CAVERN": simEventEff, "EVNT_Stopped": simEventEff} + self._eventCountConf['EVNT_CAVERN'] = {'HITS': simEventEff} + self._eventCountConf['EVNT_COSMICS'] = {'HITS': simEventEff} + self._eventCountConf['EVNT_Stopped'] = {'HITS': simEventEff} + self._eventCountConf['HITS'] = {'RDO':"match", "HITS_MRG":"match", 'HITS_FILT': simEventEff} + self._eventCountConf['BS'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "BS_MRG":"match", 'DESD_*': "filter"} + self._eventCountConf['RDO'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "RDO_MRG":"match"} + self._eventCountConf['ESD'] = {'ESD_MRG': "match", 'AOD':"match", 'DESD_*':"filter", 'DAOD_*':"filter", 'NTUP_*':"filter"} + self._eventCountConf['AOD'] = {'AOD_MRG' : "match", 'TAG':"match", "NTUP_*":"filter", "DAOD_*":"filter", 'NTUP_*':"filter"} + self._eventCountConf['AOD_MRG'] = {'TAG':"match"} + self._eventCountConf['TAG'] = {'TAG_MRG': "match"} + self._eventCountConf['HIST'] = {'HIST_MRG': "match"} + self._eventCountConf['NTUP_COMMON'] = {'DNTUP*': "filter"} + self._eventCountConf['NTUP_*'] = {'NTUP_*_MRG': "match"} + # Next one comprises special data type names for smart merging of AthenaMP worker outputs + self._eventCountConf['POOL_MRG_INPUT'] = {'POOL_MRG_OUTPUT': "match"} + + + if eventCountConf: + if eventCountConfOverwrite == True: + self._eventCountConf = eventCountConf + else: + self._eventCountConf.update(eventCountConf) + + msg.debug('Event count check configuration is: {0}'.format(self._eventCountConf)) + if hasattr(self._executor, 'name'): + msg.debug('Event count check ready for executor {0}'.format(self._executor.name)) + + if self._executor is not None: + self.configureCheck(override=False) + + ## @brief Setup the parameters needed to define particular checks + # @param override If set then configure the checks using this dictionary, which needs + # to have keys @c inEventDict, @c outEventDict, @c skipEvents, @c maxEvents, @c evAccEff + # @note Default is to configure the checks from the associated executor + def configureCheck(self, override=False): + if override: + msg.info('Overriding check configuration with: {0}'.format(override)) + self._inEventDict = override['inEventDict'] + self._outEventDict = override['outEventDict'] + self._skipEvents = override['skipEvents'] + self._maxEvents = override['maxEvents'] + self._evAccEff = override['evAccEff'] + else: + # Input data from executor + self._inEventDict = {} + for dataTypeName in self._executor.input: + try: + self._inEventDict[dataTypeName] = self._executor.conf.dataDictionary[dataTypeName].nentries + msg.debug('Input data type {0} has {1} events'.format(dataTypeName, self._inEventDict[dataTypeName])) + except KeyError: + msg.warning('Found no dataDictionary entry for input data type {0}'.format(dataTypeName)) + + # Output data from executor + self._outEventDict = {} + for dataTypeName in self._executor.output: + try: + self._outEventDict[dataTypeName] = self._executor.conf.dataDictionary[dataTypeName].nentries + msg.debug('Output data type {0} has {1} events'.format(dataTypeName, self._outEventDict[dataTypeName])) + except KeyError: + msg.warning('Found no dataDictionary entry for output data type {0}'.format(dataTypeName)) + + # Find if we have a skipEvents applied + if self._executor.conf.argdict.has_key("skipEvents"): + self._skipEvents = self._executor.conf.argdict['skipEvents'].returnMyValue(exe=self._executor) + else: + self._skipEvents = None + + # Find if we have a maxEvents applied + if self._executor.conf.argdict.has_key("maxEvents"): + self._maxEvents = self._executor.conf.argdict['maxEvents'].returnMyValue(exe=self._executor) + if self._maxEvents == -1: + self._maxEvents = None + else: + self._maxEvents = None + + # Global eventAcceptanceEfficiency set? + if self._executor.conf.argdict.has_key("eventAcceptanceEfficiency"): + self._evAccEff = self._executor.conf.argdict['eventAcceptanceEfficiency'].returnMyValue(exe=self._executor) + if (self._evAccEff == None): + self._evAccEff = 0.99 + else: + self._evAccEff = 0.99 + + msg.debug("Event check conf: {0} {1}, {2}, {3}, {4}".format(self._inEventDict, self._outEventDict, self._skipEvents, + self._maxEvents, self._evAccEff)) + + + ## @brief Perform an event count check + def decide(self): + # We have all that we need to proceed: input and output data, skip and max events plus any efficiency factor + # So loop over the input and output data and make our checks + for inData, neventsInData in self._inEventDict.iteritems(): + if type(neventsInData) not in (int, long): + msg.warning('File size metadata for {inData} was not countable, found {neventsInData}. No event checks possible for this input data.'.format(inData=inData, neventsInData=neventsInData)) + continue + if inData in self._eventCountConf: + inDataKey = inData + else: + # OK, try a glob match in this case (YMMV) + matchedInData = False + for inDataKey in self._eventCountConf.keys(): + if fnmatch.fnmatch(inData, inDataKey): + msg.info("Matched input data type {inData} to {inDataKey} by globbing".format(inData=inData, inDataKey=inDataKey)) + matchedInData = True + break + if not matchedInData: + msg.warning('No defined event count match for {inData} -> {outData}, so no check(s) possible in this case.'.format(inData=inData, outData=self._outEventDict.keys())) + continue + + # Now calculate the expected number of processed events for this input + expectedEvents = neventsInData + if self._skipEvents is not None and self._skipEvents > 0: + expectedEvents -= self._skipEvents + if expectedEvents < 0: + msg.warning('skipEvents was set higher than the input events in {inData}: {skipEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events is now 0.'.format(inData=inData, skipEvents=self._skipEvents, neventsInData=neventsInData)) + expectedEvents = 0 + if self._maxEvents is not None: + if expectedEvents < self._maxEvents: + if self._skipEvents is not None: + msg.warning('maxEvents was set higher than inputEvents-skipEvents for {inData}: {maxEvents} > {neventsInData}-{skipEvents}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self._maxEvents, neventsInData=neventsInData, skipEvents=self._skipEvents, expectedEvents=expectedEvents)) + else: + msg.warning('maxEvents was set higher than inputEvents for {inData}: {maxEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self._maxEvents, neventsInData=neventsInData, expectedEvents=expectedEvents)) + else: + expectedEvents = self._maxEvents + msg.debug('Expected number of processed events for {0} is {1}'.format(inData, expectedEvents)) + + # Loop over output data - first find event count configuration + for outData, neventsOutData in self._outEventDict.iteritems(): + if type(neventsOutData) not in (int, long): + msg.warning('File size metadata for {outData} was not countable, found "{neventsOutData}". No event checks possible for this output data.'.format(outData=outData, neventsOutData=neventsOutData)) + continue + if outData in self._eventCountConf[inDataKey]: + checkConf = self._eventCountConf[inDataKey][outData] + outDataKey = outData + else: + # Look for glob matches + checkConf = None + for outDataKey, outDataConf in self._eventCountConf[inDataKey].iteritems(): + if fnmatch.fnmatch(outData, outDataKey): + msg.info('Matched output data type {outData} to {outDatakey} by globbing'.format(outData=outData, outDatakey=outDataKey)) + outDataKey = outData + checkConf = outDataConf + break + if not checkConf: + msg.warning('No defined event count match for {inData} -> {outData}, so no check possible in this case.'.format(inData=inData, outData=outData)) + continue + msg.debug('Event count check for {inData} to {outData} is {checkConf}'.format(inData=inData, outData=outData, checkConf=checkConf)) + + # Do the check for thsi input/output combination + if checkConf is 'match': + # We need an exact match + if neventsOutData == expectedEvents: + msg.info("Event count check for {inData} to {outData} passed: all processed events found ({neventsOutData} output events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData)) + else: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), + 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents)) + elif checkConf is 'filter': + if neventsOutData <= expectedEvents and neventsOutData >= 0: + msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents)) + else: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), + 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from 0 to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents)) + elif checkConf is 'minEff': + if neventsOutData >= int(expectedEvents * self._evAccEff) and neventsOutData <= expectedEvents: + msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents)) + else: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), + 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, + minEvents=int(expectedEvents * self._evAccEff), expectedEvents=expectedEvents)) + elif isinstance(checkConf, (float, int, long)): + checkConf = float(checkConf) + if checkConf < 0.0 or checkConf > 1.0: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), + 'Event count check for {inData} to {outData} is misconfigured: the efficiency factor of {eff} is not between 0 and 1.'.format(inData=inData, outData=outData, eff=checkConf)) + if neventsOutData >= int(expectedEvents * checkConf) and neventsOutData <= expectedEvents: + msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents)) + else: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), + 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, + minEvents=int(expectedEvents * checkConf), expectedEvents=expectedEvents)) + else: + raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), + 'Unrecognised event count configuration for {inData} to {outData}: "{conf}" is not known'.format(inData=inData, outData=outData, conf=checkConf)) + + return True diff --git a/Tools/PyJobTransforms/scripts/Archive_tf.py b/Tools/PyJobTransforms/scripts/Archive_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..2a8688b69083e2532072f80ad35b646b48a6c95b --- /dev/null +++ b/Tools/PyJobTransforms/scripts/Archive_tf.py @@ -0,0 +1,64 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## Archiving transform which will tar or zip input files to an output file +# @version $Id: Archive_tf.py 531319 2012-12-20 09:49:53Z graemes $ + +import os.path +import sys +import time + +import logging + +# This is a hack for Prodsys I. To generate this list use the --dumpargs option to the transform, then paste in here +# N.B. This all has to go on one line! +ListOfDefaultPositionalKeys=['--compressionType', '--ignoreErrors', '--inputFile', '--omitFileValidation', '--outputFile', '--uploadtoami', '--validation'] + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExe import archiveExecutor +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +import PyJobTransforms.trfArgClasses as trfArgClasses + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + trf = getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, trf exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + +def getTransform(): + trf = transform(executor = archiveExecutor(name = 'Archiver', exe='tar')) + + addMyArgs(trf.parser) + return trf + + +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('Archive_tf', 'Archive transform specific options') + parser.add_argument('--inputDataFile', '--inputFile', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argFile, io='input', type='misc'), + help='Input file(s)', group='Archive_tf') + parser.add_argument('--outputArchFile', '--outputFile', + type=trfArgClasses.argFactory(trfArgClasses.argFile, io='output', type='misc'), + help='Output archive file', + group='Archive_tf') + parser.add_argument('--compressionType', group='Archive_tf', + help='Underlying compression type', choices=['gzip', 'bzip2', 'none'], + default='gzip') + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/Athena_tf.py b/Tools/PyJobTransforms/scripts/Athena_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..db1d17334e71634bd5a461d435ec9814e87a8f82 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/Athena_tf.py @@ -0,0 +1,64 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A simple athena transform. +# @version $Id: Athena_tf.py 557865 2013-08-12 21:54:36Z graemes $ + +import argparse +import os.path +import sys +import time +import traceback + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExe import athenaExecutor +from PyJobTransforms.trfArgs import addAthenaArguments, addDetectorArguments +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +import PyJobTransforms.trfExceptions as trfExceptions +import PyJobTransforms.trfArgClasses as trfArgClasses + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + trf = getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, trf exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + +## Get the base transform with all arguments added +def getTransform(): + trf = transform(executor = athenaExecutor(name = 'athena')) + addAthenaArguments(trf.parser) + addDetectorArguments(trf.parser) + addMyArgs(trf.parser) + return trf + + +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('Athena_trf', 'Athena_trf specific options') + parser.add_argument('--testInt', type=trfArgClasses.argFactory(trfArgClasses.argInt, runarg=True), + help='Test integer argument', group='Athena_trf') + parser.add_argument('--testFloat', type=trfArgClasses.argFactory(trfArgClasses.argFloat, runarg=True), + help='Test float argument', group='Athena_trf') + parser.add_argument('--maxMsgLevel', type=trfArgClasses.argFactory(trfArgClasses.argString, runarg=True), + help='Highest message level to print in athena (not yet implemented!)', group='Athena_trf') + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/Cat_tf.py b/Tools/PyJobTransforms/scripts/Cat_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..229acbf4a6e81733d79e1d479e993a4f81733796 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/Cat_tf.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A simple 'cat' transform which just cats some input files +# $Id: Cat_tf.py 529035 2012-12-05 15:45:24Z graemes $ + +import argparse +import os +import os.path +import sys +import time +import traceback + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfExe import scriptExecutor +import PyJobTransforms.trfArgs as trfArgs +import PyJobTransforms.trfArgClasses as trfArgClasses +import PyJobTransforms.trfExceptions as trfExceptions +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + trf = getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, trf exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + +def getTransform(): + trf = transform(executor = scriptExecutor(name = 'The Fabulous Cat', exe = 'cat', exeArgs = ['inFile'])) + addMyArgs(trf.parser) + return trf + +## Example of how to add some specific arguments to the transform +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('Cat_trf', 'Cat_trf specific options') + parser.add_argument('--inFile', type=trfArgClasses.argFactory(trfArgClasses.argString), + group='Cat_trf', help='Input file for the cat') + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/EVNTMerge_tf.py b/Tools/PyJobTransforms/scripts/EVNTMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..c2f8e71f6955bd09b1b29e92a19d40af16466268 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/EVNTMerge_tf.py @@ -0,0 +1,67 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## AODMerge_tf.py - AOD merger with optional TAG and DPD outputs +# N.B. Do need clarification as to if AODtoDPD is ever run in parallel with AOD merging +# @version $Id: AODMerge_tf.py 530225 2012-12-12 18:16:17Z graemes $ + +import sys +import time + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExe import athenaExecutor +from PyJobTransforms.trfArgs import addAthenaArguments +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +import PyJobTransforms.trfArgClasses as trfArgClasses + +ListOfDefaultPositionalKeys=['--amiConfig', '--amiMetadataTag', '--asetup', '--athena', '--athenaopts', '--checkEventCount', '--command', '--env', '--eventAcceptanceEfficiency', '--execOnly', '--ignoreErrors', '--ignoreFilters', '--ignorePatterns', '--inputEVNTFile', '--maxEvents', '--noimf', '--notcmalloc', '--outputEVNT_MRGFile', '--postExec', '--postInclude', '--preExec', '--preInclude', '--reportName', '--showGraph', '--showPath', '--showSteps', '--skipEvents', '--skipFileValidation', '--skipInputFileValidation', '--skipOutputFileValidation'] + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + trf = getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, trf exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + +def getTransform(): + executorSet = set() + print type(executorSet) + executorSet.add(athenaExecutor(name = 'EVNTMerge', skeletonFile = 'PyJobTransforms/skeleton.EVNTMerge.py',inData = ['EVNT'], outData = ['EVNT_MRG'])) + + trf = transform(executor = executorSet) + + addAthenaArguments(trf.parser) + addMyArgs(trf.parser) + return trf + + +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('EVNTMerge_tf', 'EVNT merge job specific options') + parser.add_argument('--inputEVNTFile', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argPOOLFile, io='input', runarg=True, type='evnt'), + help='Input EVNT file', group='EVNTMerge_tf') + parser.add_argument('--outputEVNT_MRGFile', '--outputEVNTFile', + type=trfArgClasses.argFactory(trfArgClasses.argPOOLFile, io='output', runarg=True, type='evnt'), + help='Output merged EVNT file', group='EVNTMerge_tf') + + + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/Echo_tf.py b/Tools/PyJobTransforms/scripts/Echo_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..3a11eff64dfc20199aa213f53e66c2ceeacbae12 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/Echo_tf.py @@ -0,0 +1,80 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A simple 'echo' transform which merely prints its arguments and exits +# $Id: Echo_tf.py 532364 2013-01-09 15:51:55Z graemes $ + +import argparse +import os +import os.path +import sys +import time +import traceback + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfExe import echoExecutor +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +import PyJobTransforms.trfArgs as trfArgs +import PyJobTransforms.trfArgClasses as trfArgClasses +import PyJobTransforms.trfExceptions as trfExceptions + +# Always embed your transform inside a top level exception +# handler. This ensures that uncaught exceptions are handled +# with a modicum of decency and that the transform has the +# chance to produce a job report and a sensible exit code. +# These decorators do this. +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + trf = getTransform() + + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, transform exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + +def getTransform(): + trf = transform(executor = echoExecutor()) + addMyArgs(trf.parser) + trfArgs.addTeaArguments(trf.parser) + trfArgs.addAthenaArguments(trf.parser) + return trf + + +## Example of how to add some specific arguments to the transform +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('Echo_trf', 'Echo_trf specific options') + parser.add_argument('--testInt', group='Echo_trf', type=trfArgClasses.argFactory(trfArgClasses.argInt), help='An integer') + parser.add_argument('--testFloat', group='Echo_trf', type=trfArgClasses.argFactory(trfArgClasses.argFloat), help='A float') + parser.add_argument('--testList', group='Echo_trf', + type=trfArgClasses.argFactory(trfArgClasses.argList), help='A string list', nargs='+') + parser.add_argument('--testIntList', group='Echo_trf', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argIntList), help='A int list') + parser.add_argument('--testFile', group='Echo_trf', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argFile, io='input'), help='Test file(s)') + parser.add_argument('--testSubstepList', group='Echo_trf', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argSubstepList), help='A substep list') + parser.add_argument('--testSubstepInt', group='Echo_trf', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argSubstepInt), help='A substep int') + parser.add_argument('--testSubstepBool', group='Echo_trf', nargs='+', + type=trfArgClasses.argFactory(trfArgClasses.argSubstepBool), help='A substep bool') + + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/ExeWrap_tf.py b/Tools/PyJobTransforms/scripts/ExeWrap_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..61ce0fa2cf7b7e91400bfd72e962b85eb3d997f0 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/ExeWrap_tf.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A simple executor wrapper around the transforms' scriptExecutor +# Mainly used to test core infrastructure +# $Id: ExeWrap_tf.py 529035 2012-12-05 15:45:24Z graemes $ + +import argparse +import os +import os.path +import sys +import time +import traceback + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfExe import scriptExecutor +import PyJobTransforms.trfArgs as trfArgs +import PyJobTransforms.trfArgClasses as trfArgClasses +import PyJobTransforms.trfExceptions as trfExceptions +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + trf = getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + + # Need to update what we want to execute after the command line is parsed + # LHS is the slightly convoluted way to get at the single member of a set + # (which, of course, itself has no index) + list(trf._executor)[0].exe = trf.argdict['exe'].value + + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, trf exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + +def getTransform(): + trf = transform(executor = scriptExecutor(name = 'ExeWrap', exe = None, exeArgs = ['args'])) + addMyArgs(trf.parser) + return trf + +## Example of how to add some specific arguments to the transform +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('ExeWrap_trf', 'ExeWrap_trf specific options') + parser.add_argument('--exe', type=trfArgClasses.argFactory(trfArgClasses.argString), + group='ExeWrap_trf', help='Executable to invoke') + parser.add_argument('--args', type=trfArgClasses.argFactory(trfArgClasses.argList), nargs='+', + group='ExeWrap_trf', help='Additonal parameters to the executor') + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/GetTfCommand.py b/Tools/PyJobTransforms/scripts/GetTfCommand.py new file mode 100755 index 0000000000000000000000000000000000000000..d3ab86a2dc8d31f3dc327e2520208f1736ff6977 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/GetTfCommand.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## GetTfCommand.py - prints the job transform command accociated with an AMI tag. +# $Id$ + +import sys +import argparse + +from PyJobTransforms.trfLogger import msg, stdLogLevels +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.trfAMI import TagInfo +from PyJobTransforms.trfExceptions import TransformAMIException + +def main(): + + parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description = 'GetTfCommand.py - prints the job transform commands accociated with an AMI tag.') + parser.add_argument('--AMI', '--AMIConfig', help='Production tag to be interpreted') + parser.add_argument('--verbose', '--debug', action='store_true', help='set logging level to DEBUG') + parser.add_argument('--argdict', help='name of pickle file in which the result is stored. Not yet implemented') + + args=vars(parser.parse_args(sys.argv[1:])) + + if 'verbose' in args: + msg.setLevel(stdLogLevels['DEBUG']) + + try: + tag=TagInfo(args['AMI']) + except TransformAMIException, e: + print 'An AMI exception was raised when trying to resolve the tag {0}.'.format(args['AMI']) + print 'Exception message was: {0}'.format(e.errMsg) + print 'Note that you need both suitable credentials to access AMI and access to the panda database (only works from inside CERN) for GetTfCommand.py to work.' + sys.exit(1) + + print tag + + if 'argdict' in args: + tag.dump(args['argdict']) + + +if __name__ == '__main__': + + main() + + + + diff --git a/Tools/PyJobTransforms/scripts/ScanLog.py b/Tools/PyJobTransforms/scripts/ScanLog.py new file mode 100755 index 0000000000000000000000000000000000000000..342d0dc23a4d96f4474a047dd3896e3934ab8bab --- /dev/null +++ b/Tools/PyJobTransforms/scripts/ScanLog.py @@ -0,0 +1,70 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A simple logfile scanner for post-facto analysis of existing logs +# @version $Id: ScanLog.py 609222 2014-07-29 14:43:50Z graemes $ + +import argparse +import pprint +import sys +import time + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfLogger import stdLogLevels +from PyJobTransforms.trfValidation import athenaLogFileReport, ignorePatterns + +import PyJobTransforms.trfExceptions as trfExceptions +import PyJobTransforms.trfArgClasses as trfArgClasses + + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + # This should change to be a transform so as to be able support transform + # validation option switches + parser = argparse.ArgumentParser(description='Subject a pre-existing athena logfile to standard transform analysis', + argument_default=argparse.SUPPRESS) + parser.add_argument('--logfile', help='Athena logfile to examine', required=True) + parser.add_argument('--ignoreFiles', default = ['atlas_error_mask.db'], + help='Files containing error patterns to be ignored during logfile scans (use "None" to disable the standard "atlas_error_mask.db")', nargs='+') + parser.add_argument('--ignorePatterns', default = [], + help='Regexp error patterns to be ignored during logfile scans (will be applied as a search against the whole logfile line)', nargs='+') + args = vars(parser.parse_args()) + + ignPat = ignorePatterns(files = args['ignoreFiles'], extraSearch = args['ignorePatterns']) + + msg.info('Scanning logfile {0}'.format(args['logfile'])) + logReport = athenaLogFileReport(logfile = args['logfile'], ignoreList = ignPat) + pprint.pprint(logReport.python) + + # Maybe this code should be shared with the athena executor? + try: + worstError = logReport.worstError() + if worstError['nLevel'] >= stdLogLevels['ERROR']: + errmsg = 'Fatal error in athena logfile (level %s)' % worstError['level'] + msg.error(errmsg) + if worstError['firstError']: + raise trfExceptions.TransformLogfileErrorException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), + 'Fatal error in athena logfile: "%s" (occurred %d times, first occurrance line %d)' % + (worstError['firstError']['message'], worstError['firstError']['count'], worstError['firstError']['firstLine'])) + else: + raise trfExceptions.TransformLogfileErrorException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), + 'Fatal error in athena logfile (specific message unknown).') + except trfExceptions.TransformException, e: + msg.error('%s' % e.errMsg) + sys.exit(e.errCode) + + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/Sleep_tf.py b/Tools/PyJobTransforms/scripts/Sleep_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..f0d8d15cf673caacd8c12fb61fbea7a33dc624a8 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/Sleep_tf.py @@ -0,0 +1,60 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A simple 'sleep' transform which just sleeps +# Useful for testing behaviour with signals and stuff +# $Id: Sleep_tf.py 534178 2013-01-21 19:04:08Z graemes $ + +import argparse +import os +import os.path +import sys +import time +import traceback + +import logging + +# Setup core logging here +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfExe import scriptExecutor +import PyJobTransforms.trfArgs as trfArgs +import PyJobTransforms.trfArgClasses as trfArgClasses +import PyJobTransforms.trfExceptions as trfExceptions +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + msg.info('This is %s' % sys.argv[0]) + + + trf = getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + + msg.info("%s stopped at %s, trf exit code %d" % (sys.argv[0], time.asctime(), trf.exitCode)) + sys.exit(trf.exitCode) + + +def getTransform(): + trf = transform(executor = scriptExecutor(name = 'The Sandman', exe = 'sleep', exeArgs = ['dust'])) + addMyArgs(trf.parser) + return trf + +## Example of how to add some specific arguments to the transform +def addMyArgs(parser): + # Use arggroup to get these arguments in their own sub-section (of --help) + parser.defineArgGroup('Sleep_trf', 'Sleep_trf specific options') + parser.add_argument('--dust', type=trfArgClasses.argFactory(trfArgClasses.argInt), group='Sleep_trf', + help='How much dust to throw (=sleep time in seconds)', default=trfArgClasses.argInt(10)) + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/TransformTestRunner.py b/Tools/PyJobTransforms/scripts/TransformTestRunner.py new file mode 100755 index 0000000000000000000000000000000000000000..43ea85d24c386ee1ffcab5a9b40a055ef8ec8893 --- /dev/null +++ b/Tools/PyJobTransforms/scripts/TransformTestRunner.py @@ -0,0 +1,35 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +## @note Find a named test in the DATAPATH and execute it with all arguments +# @version $Id: TransformTestRunner.py 551981 2013-06-21 10:16:16Z graemes $ + +import os +import os.path +import sys + +from PyJobTransforms.trfUtils import findFile +from PyJobTransforms.trfLogger import msg + +def main(): + if len(sys.argv) < 2: + msg.error('No test argument was given') + sys.exit(1) + + if 'DATAPATH' not in os.environ: + msg.error('There is no DATAPATH to search along - is the release setup?') + sys.exit(1) + + testScript = os.path.join('JobTransforms/test', sys.argv[1]) + + pathToTestScript = findFile(os.environ['DATAPATH'], testScript) + if pathToTestScript is None: + msg.error('Test {0} was not found along DATAPATH'.format(testScript)) + sys.exit(1) + + msg.info('Found test {0} here: {1}'.format(sys.argv[1], pathToTestScript)) + os.execv(pathToTestScript, sys.argv[1:]) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py b/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..e4e383af6289243c987e5bf1075083960a01533e --- /dev/null +++ b/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py @@ -0,0 +1,69 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## A transform just getting some default file arguments (only to test file validation ) + +import argparse +import os +import os.path +import sys +import time +import traceback + +import logging + +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +from PyJobTransforms.transform import transform +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfExe import transformExecutor +import PyJobTransforms.trfArgs as trfArgs +import PyJobTransforms.trfArgClasses as trfArgClasses +from PyJobTransforms.trfDecorators import stdTrfExceptionHandler, sigUsrStackTrace + + +@stdTrfExceptionHandler +@sigUsrStackTrace +def main(): + + trf=getTransform() + trf.parseCmdLineArgs(sys.argv[1:]) + trf.execute() + trf.generateReport() + sys.exit(trf.exitCode) + +def getTransform(): + trf=transform(executor = transformExecutor()) + + # Mostly reco types... + addArgs(trf.parser) + trfArgs.addFileValidationArguments(trf.parser) + trfArgs.addParallelJobProcessorArguments(trf.parser) + + # Add all known D3PD types + trfArgs.addD3PDArguments(trf.parser, transform = trf, multipleOK=True) + + return trf + +def addArgs(parser): + # N.B. Although the trf does not generate these files, badge them as 'output' to run full validation + parser.add_argument('--outputBSFile', type=trfArgClasses.argFactory(trfArgClasses.argBSFile, io='output', type='bs', multipleOK=True), nargs='+') + parser.add_argument('--outputESDFile', type=trfArgClasses.argFactory(trfArgClasses.argPOOLFile, io='output', type='esd', multipleOK=True), nargs='+') + parser.add_argument('--outputAODFile', type=trfArgClasses.argFactory(trfArgClasses.argPOOLFile, io='output', type='aod', multipleOK=True), nargs='+') + parser.add_argument('--outputRDOFile', type=trfArgClasses.argFactory(trfArgClasses.argPOOLFile, io='output', type='rdo', multipleOK=True), nargs='+') + parser.add_argument('--outputTAGFile', type=trfArgClasses.argFactory(trfArgClasses.argTAGFile, io='output', type='tag', multipleOK=True), nargs='+') + parser.add_argument('--outputEVNTFile', type=trfArgClasses.argFactory(trfArgClasses.argPOOLFile, io='output', type='evnt', multipleOK=True), nargs='+') + parser.add_argument('--outputHISTFile', type=trfArgClasses.argFactory(trfArgClasses.argHISTFile, io='output', type='hist', multipleOK=True), nargs='+') + parser.add_argument('--outputTXT_FTKIPFile', type=trfArgClasses.argFactory(trfArgClasses.argFTKIPFile, io='output', multipleOK=True), nargs='+') + parser.add_argument('--outputNTUP_FTKSIMFile', type=trfArgClasses.argFactory(trfArgClasses.argNTUPFile, io='output', type='ntup_ftksim', treeNames = ['ftkdata'], multipleOK=True), nargs='+') + parser.add_argument('--outputEvtFile', type=trfArgClasses.argFactory(trfArgClasses.argHepEvtAsciiFile, io='output', type='evt', multipleOK=True), nargs='+') + + + # This is a dummy argument which makes the graph tracer code happy + parser.add_argument('--inputFile', type=trfArgClasses.argFactory(trfArgClasses.argFile, io='input', type='dummy'), default=trfArgClasses.argFile(type='dummy')) + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/scripts/makeTrfSignatures.py b/Tools/PyJobTransforms/scripts/makeTrfSignatures.py new file mode 100755 index 0000000000000000000000000000000000000000..cff7dd276fa2ba782bb3a3ab84718b60e3ac984d --- /dev/null +++ b/Tools/PyJobTransforms/scripts/makeTrfSignatures.py @@ -0,0 +1,72 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# $Id: makeTrfSignatures.py 528688 2012-12-03 15:18:29Z graemes $ +# + +__doc__ = 'Dump job transform arguments into a file as a pickle' + +import argparse +import glob +import os +import os.path +import json +import sys + +import logging +logging.basicConfig(format='%(filename)s:%(levelname)s:%(message)s', level=logging.INFO) + +def main(): + parser = argparse.ArgumentParser(description = __doc__, ) + parser.add_argument('--output', help='JSON output file', required = True) + parser.add_argument('--transforms', help='Comma separated list of transforms to process', default='all') + cliargs = vars(parser.parse_args()) + + # Make sure we can import from where the trfs actually live + # (N.B. This script is invoked from the cmt directory at install time) + sys.path.insert(1, os.path.join(os.getcwd(), '../scripts')) + + myTrfSigs = {} + myTrfSigDesc = {} + + if cliargs['transforms'] == 'all': + # List comprehension strips off the path and removes the .py suffix + # Look for all _tf.py (new style names) + myTrfs = [ os.path.basename(t)[:-3] for t in glob.glob('../scripts/*_tf.py') ] + else: + myTrfs = cliargs['transforms'].split(',') + logging.info('Will process this list of transforms: {0}'.format(' '.join(myTrfs))) + processedTrfs = [] + + for trf in myTrfs: + logging.info('Processing argument signatures for {0}'.format(trf)) + # Use __import__ to allow us to import from the trf list + try: + trfModule = __import__('{0}'.format(trf), globals(), locals(), ['getTransform'], -1) + except ImportError: + logging.warning('Failed to import transform {0} - ignored'.format(trf)) + continue + if 'getTransform' not in dir(trfModule): + logging.warning('Transform {0} has no getTransform() functionality - ignored for pickle'.format(trf)) + continue + transform = trfModule.getTransform() + args = transform.parser.allArgs + + logging.debug('Trf %s: %s' % (trf, args)) + processedTrfs.append(trf) + myTrfSigs[trf] = args + myTrfSigDesc[trf] = transform.parser.getProdsysDesc() + try: + logging.info('Writing JSON signatures to {0}'.format(cliargs['output'])) + sigFile = open(cliargs['output'], 'wb') + json.dump(myTrfSigDesc, sigFile, indent=4) + except (OSError, IOError) as e: + logging.error('Failed to dump pickled signatures to %s: %s' % (cliargs['output'], e)) + sys.exit(1) + + logging.info('Successfully generated signature file "%s" for transforms %s' % (cliargs['output'], processedTrfs)) + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/share/UseFrontier.py b/Tools/PyJobTransforms/share/UseFrontier.py new file mode 100644 index 0000000000000000000000000000000000000000..a9179877102f244de5f4530de69799b1c90dd786 --- /dev/null +++ b/Tools/PyJobTransforms/share/UseFrontier.py @@ -0,0 +1,13 @@ +## @brief Switch database to using FRONTIER, but with a fallback +# to DBRelease if FRONTIER_SERVER is undefined (e.g., on HPC) + +# Move from RecJobTransforms to PyJobTransforms to enable use +# in simulation ATN and KV jobs +# $Id: UseFrontier.py 605683 2014-07-09 17:22:17Z graemes $ + +if(os.environ.get('FRONTIER_SERVER')): + print 'UseFrontier.py: Enabling FRONTIER DB access' + from DBReplicaSvc.DBReplicaSvcConf import DBReplicaSvc + ServiceMgr+=DBReplicaSvc(COOLSQLiteVetoPattern="DBRelease") +else: + print 'UseFrontier.py: Using default DB access' \ No newline at end of file diff --git a/Tools/PyJobTransforms/share/atlas_error_mask.db b/Tools/PyJobTransforms/share/atlas_error_mask.db new file mode 100755 index 0000000000000000000000000000000000000000..a5232afdf01183e6487d7ba8c67e4eea10a16766 --- /dev/null +++ b/Tools/PyJobTransforms/share/atlas_error_mask.db @@ -0,0 +1,74 @@ +# $Id: atlas_error_mask.db 576626 2013-12-21 23:29:31Z graemes $ + +# Error mask file for new transforms +# Each line contains 3 fields, separated by commas: +# who_regexp, level, error_message_regexp +# +# When parsed, all whitespace around these expressions is stripped +# +# who_regexp is matched against the athena service that produced the message; if the service is empty then any service matches +# level is compared for equality against the message level; if empty any level matches +# error_message_regexp is matched against the message string (so add a ".*" prefix if you want a search instead) +# +# Note the the goal is to reduce this file to ZERO entries! + +## Errors to ignore for ALL releases +## ================================= +ToolSvc.CscSplitClusterFitter, ERROR, Peak-to-Val dist is [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? Val-to-Peak dist is [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? Shouldnot be negative value :[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? +AlgErrorAuditor , ERROR, Illegal Return Code: Algorithm CscThresholdClusterBuilder reported an ERROR, but returned a StatusCode "SUCCESS" +AlgErrorAuditor , ERROR, Illegal Return Code: Algorithm InDetSCTRawDataProvider reported an ERROR, but returned a StatusCode "SUCCESS" +(?:Py:)?Athena , ERROR, inconsistent case used in property name ".*?" of ApplicationMgr +(?:Py:)?Athena , ERROR, Algorithm ".*?": not in TopAlg or other known list, no properties set +(?:Py:)?Athena , ERROR, Algorithm ".*?": type missing, no properties set +(?:Py:)?Athena , ERROR, attempt to add .* to non-existent property .*? +(?:Py:)?Configurable, ERROR, .* undeclared or uses a backdoor +(?:Py:)?Configurable, ERROR, children\(\) is deprecated +(?:Py:)?Configurable, ERROR, getChildren\(\) returns a copy +(?:Py:)?Configurable, ERROR, jobOptName\(\) is deprecated + +# Reco +(?:Py:)?Configurable, ERROR, attempt to add a duplicate \(CellCalibrator.CellCalibrator.H1WeightCone7H1Tower\) +(?:Py:)?ResourceLimits,ERROR, failed to set max resource limits +AlgErrorAuditor, ERROR, Illegal Return Code: Algorithm StreamESD reported an ERROR, but returned a StatusCode "SUCCESS" + +# Trigger BStoRDO +AthenaRefIOHandler, ERROR, Failed to set ElementLink +ElementLink, ERROR, toPersistent: the internal state of link +StoreGateSvc, ERROR, record: object not added to store +StoreGateSvc, ERROR, setupProxy:: error setting up proxy +AlgErrorAuditor, ERROR, Illegal Return Code: Algorithm MooHLTAlgo +AlgErrorAuditor, ERROR, Illegal Return Code: Algorithm TrigSteer_EF +AlgErrorAuditor, ERROR, Illegal Return Code: Algorithm muFast_(?:Muon|900GeV) + +# Trigger reco_ESD +THistSvc, ERROR, already registered an object with identifier "/EXPERT/ +RpcRawDataNtuple , ERROR, .* +CBNT_L1CaloROD\S+ , ERROR, .* +CBNTAA_Tile\S+ , ERROR, .* +TileDigitsMaker , ERROR, .* +MdtDigitToMdtRDO , ERROR, .* +HelloWorld , ERROR, .* +HelloWorld , FATAL, .* +PythiaB , ERROR, ERROR in PYTHIA PARAMETERS +ToolSvc , ERROR, Tool .* not found and creation not requested +ToolSvc , ERROR, Unable to finalize the following tools +ToolSvc , ERROR, Factory for Tool .* not found +CBNT_Audit , ERROR, Memory leak!.* +ToolSvc.InDetSCTRodDecoder , ERROR, Unknown offlineId for OnlineId* +THistSvc.sysFinali, FATAL, Standard std::exception is caught +,,.*Message limit reached for .* +,,\s+ERROR IN C-S .*=.* +,,.*ERROR\s+\|.* +,,^\s*FATAL ERROR\s*$ +,ERROR, \(poolDb\): +,ERROR, \(pool\): +,ERROR, - G4Navigator::ComputeStep\(\) +,,.*ERROR OCCURED DURING A SECONDARY SCATTER AND WAS +THistSvc , ERROR, already registered an object with identifier .* +,ERROR, MuonDetectorManager::getCscReadoutElement stNameindex out of range .* +muFast_\S+ , ERROR, CSM for Subsystem \d+, MrodId \d+, LinkId \d+ not found +TRTDetectorManager , FATAL, Unable to apply Inner Detector alignments +TRTDetectorManager , ERROR, AlignableTransformContainer for key \/TRT\/Align is empty +,ERROR, in Single_Process::CalculateTotalXSec +,,.*ERROR WITH DELM.* +AlgErrorAuditor,ERROR, Illegal Return Code: Algorithm diff --git a/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py b/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py new file mode 100644 index 0000000000000000000000000000000000000000..6237405ef2abd427f7c21401c3fb3d61abaa173e --- /dev/null +++ b/Tools/PyJobTransforms/share/skeleton.EVNTMerge.py @@ -0,0 +1,23 @@ +############################# +## basic jobO configuration +include("PATJobTransforms/CommonSkeletonJobOptions.py") +## load pool support +import AthenaPoolCnvSvc.ReadAthenaPool +import AthenaPoolCnvSvc.WriteAthenaPool + +## input +ServiceMgr.EventSelector.InputCollections = runArgs.inputEVNTFile + +## output stream +from AthenaPoolCnvSvc.WriteAthenaPool import AthenaPoolOutputStream + +outStream = AthenaPoolOutputStream("StreamEVGEN", runArgs.outputEVNT_MRGFile) + +## copy everything from the input file +## must force reading of all input objects +outStream.TakeItemsFromInput = True +outStream.ForceRead = True +########## EOF ############### + + + diff --git a/Tools/PyJobTransforms/share/skeleton.dummy.py b/Tools/PyJobTransforms/share/skeleton.dummy.py new file mode 100644 index 0000000000000000000000000000000000000000..261907032c83553e63f0c58caa2a806b29e4426f --- /dev/null +++ b/Tools/PyJobTransforms/share/skeleton.dummy.py @@ -0,0 +1,112 @@ +############################################################### +# +# Job options file +# +#============================================================== + +#-------------------------------------------------------------- +# ATLAS default Application Configuration options +#-------------------------------------------------------------- + +# No event selector needed for basic 'Hello World!' Algorithm + +#-------------------------------------------------------------- +# Private Application Configuration options +#-------------------------------------------------------------- + +# Full job is a list of algorithms +from AthenaCommon.AlgSequence import AlgSequence +job = AlgSequence() + +# Add top algorithms to be run +from AthExHelloWorld.AthExHelloWorldConf import HelloAlg +job += HelloAlg( "HelloWorld" ) # 1 alg, named "HelloWorld" + +#-------------------------------------------------------------- +# Set output level threshold (DEBUG, INFO, WARNING, ERROR, FATAL) +#-------------------------------------------------------------- + +# Output level for HelloAlg only (note name: instance, not type) +job.HelloWorld.OutputLevel = INFO + +# You can set the global output level on the message svc (not +# recommended) or by using the -l athena CLI parameter + +#-------------------------------------------------------------- +# Event related parameters +#-------------------------------------------------------------- + +# Number of events to be processed (default is until the end of +# input, or -1, however, since we have no input, a limit needs +# to be set explicitly, here, choose 10) +theApp.EvtMax = 10 + +#-------------------------------------------------------------- +# Algorithms Private Options (all optional) +#-------------------------------------------------------------- + +# For convenience, get a reference to the HelloAlg Algorithm +# named "HelloWorld" in the job +HelloWorld = job.HelloWorld + +# Set an int property +HelloWorld.MyInt = 42 + +# Set a boolean property (False, True, 0, 1) +HelloWorld.MyBool = True + +# Set a double property +HelloWorld.MyDouble = 3.14159 + +# Set a vector of strings property ... +HelloWorld.MyStringVec = [ "Welcome", "to", "Athena", "Framework", "Tutorial" ] + +# ... and add one more: +HelloWorld.MyStringVec += [ "!" ] + +# Set a map of strings to strings property ... +HelloWorld.MyDict = { 'Bonjour' : 'Guten Tag', + 'Good Morning' : 'Bonjour' , 'one' : 'uno' } + +# ... and add one more: +HelloWorld.MyDict[ "Goeiedag" ] = "Ni Hao" + +# Set a table (a vector of pairs of doubles) ... +HelloWorld.MyTable = [ ( 1 , 1 ) , ( 2 , 4 ) , ( 3 , 9 ) ] + +# ... and one more: +HelloWorld.MyTable += [ ( 4, 16 ) ] + +# Set a matrix (a vector of vectors) ... +HelloWorld.MyMatrix = [ [ 1, 2, 3 ], + [ 4, 5, 6 ] ] + +# ... and some more: +HelloWorld.MyMatrix += [ [ 7, 8, 9 ] ] + +#-------------------------------------------------------------- +# Algorithms Tool Usage Private Options (advanced and optional) +#-------------------------------------------------------------- + +# Import configurable for using our HelloTool +from AthExHelloWorld.AthExHelloWorldConf import HelloTool + +# Setup a public tool so that it can be used (again, note name) +ToolSvc += HelloTool( "PublicHello" ) +ToolSvc.PublicHello.MyMessage = "A Public Message!" + +# Tell "HelloWorld" to use this tool ("MyPublicHelloTool" is a +# ToolHandle property of HelloAlg) +HelloWorld.MyPublicHelloTool = ToolSvc.PublicHello + +# Hand "HelloWorld" a private HelloTool ("MyPrivateHelloTool" is +# a ToolHandler property of HelloAlg) +HelloWorld.MyPrivateHelloTool = HelloTool( "HelloTool" ) +HelloWorld.MyPrivateHelloTool.MyMessage = "A Private Message!" + +#============================================================== +# +# End of job options file +# +############################################################### + diff --git a/Tools/PyJobTransforms/test/PyJobTransforms_TestConfiguration.xml b/Tools/PyJobTransforms/test/PyJobTransforms_TestConfiguration.xml new file mode 100644 index 0000000000000000000000000000000000000000..ae432f40f494d294bf492923c2afff00055229fe --- /dev/null +++ b/Tools/PyJobTransforms/test/PyJobTransforms_TestConfiguration.xml @@ -0,0 +1,438 @@ +<?xml version="1.0"?> +<!DOCTYPE unifiedTestConfiguration SYSTEM "http://www.hep.ucl.ac.uk/atlas/AtlasTesting/DTD/unifiedTestConfiguration.dtd"> + +<unifiedTestConfiguration> + <atn> + + <TEST name="transform" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_transform.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfArgClasses" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfArgClasses.py + </options_atn> + <timelimit>10</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfArgs" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfArgs.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfDecorators" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfDecorators.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfExceptions" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfExceptions.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfExe" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfExe.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + <TEST name="trfExitCodes" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfExitCodes.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfFileUtils" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfFileUtils.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfGraph" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfGraph.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfJobOptions" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfJobOptions.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfLogger" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfLogger.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfReports" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfReports.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfSignal" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfSignal.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfUtils" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfUtils.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfUtils" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfUtilsParallelJobProcessor.py + </options_atn> + <timelimit>2</timelimit> + <author>Will Breaden Madden</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfValidateRootFile" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfValidateRootFile.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + <TEST name="trfValidation" type="script" suite="PyJobTransforms-CoreTests"> + <options_atn> + ${ATN_PACKAGE}/test/test_trfValidation.py + </options_atn> + <timelimit>2</timelimit> + <author>Graeme Stewart</author> + <mailto>atlas-comp-transforms-dev@cern.ch</mailto> + <expectations> + <errorMessage>FAILED </errorMessage> + <returnValue>0</returnValue> + </expectations> + </TEST> + + </atn> + + <rtt xmlns="http://www.hep.ucl.ac.uk/atlas/AtlasTesting/rtt"> + + <rttContactPerson>Graeme Stewart</rttContactPerson> + <mailto>atlas-comp-transforms-dev@cern.ch@cern.ch</mailto> + + <jobList> + + <classification> + <displayClass>OfflineValidation</displayClass> + <displayProcess>Transforms</displayProcess> + <displayComponent>Core</displayComponent> + </classification> + + <jobTransform userJobId="ArgClassesTest"> + <doc>Unittest suite for trfArgClasses</doc> + <jobTransformJobName>trfArgClasses</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_trfArgClasses.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="AMITagTest"> + <doc>Unittest suite for AMI tag resolution</doc> + <jobTransformJobName>trfArgClasses</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_trfAMI.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="DBReleaseTest"> + <doc>Unittest suite for trfArgClasses</doc> + <jobTransformJobName>DBRelease</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_trfUtilsDBRelease.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="ParallelJobProcessorDataTest"> + <doc>Unittest suite for ParallelJobProcessorData</doc> + <jobTransformJobName>trfUtilsParallelJobProcessorData</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_trfUtilsParallelJobProcessorData + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="EchoTest"> + <doc>Test echo transform runs correctly</doc> + <jobTransformJobName>Echo_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_Echo_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="SleepTest"> + <doc>Test sleep transform runs correctly</doc> + <jobTransformJobName>Sleep_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_Sleep_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="LogScanTest"> + <doc>Test logfile scanning options</doc> + <jobTransformJobName>trfArgClasses</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_LogFileScanner.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="RAWMergeTest"> + <doc>Test RAW merging transform runs correctly</doc> + <jobTransformJobName>RAWMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_RAWMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="TAGMergeTest"> + <doc>Test TAG merging transform runs correctly</doc> + <jobTransformJobName>TAGMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_TAGMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>short</queue> + </jobTransform> + + <jobTransform userJobId="RecoTestBasic"> + <doc>Test simple Reco_tf transform runs correctly</doc> + <jobTransformJobName>Reco_tf_basic</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_Reco_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="RecoTestTier0"> + <doc>Test simple Reco_tf transform runs correctly in Tier0 mode</doc> + <jobTransformJobName>Reco_tf_Tier0</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_Reco_Tier0_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="RecoTestBasicEOS"> + <doc>Test simple Reco_tf transform runs correctly for bytestream on EOS</doc> + <jobTransformJobName>Reco_tf_basic_EOS</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_Reco_EOS_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="EVNTMergeTest"> + <doc>Test EVNT merging runs correctly</doc> + <jobTransformJobName>EVNTMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_EVNTMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="ESDMergeTest"> + <doc>Test ESD merging runs correctly</doc> + <jobTransformJobName>ESDMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_ESDMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="AODMergeTest"> + <doc>Test AOD merging with TAG creation runs correctly</doc> + <jobTransformJobName>AODMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_AODMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="NTUPMergeTest"> + <doc>Test NTUP merging runs correctly</doc> + <jobTransformJobName>NTUPMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_NTUPMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + + <jobTransform userJobId="HISTMergeTest"> + <doc>Test DQ HIST merging runs correctly</doc> + <jobTransformJobName>HISTMerge_tf</jobTransformJobName> + <jobTransformCmd> + TransformTestRunner.py test_HISTMerge_tf.py + </jobTransformCmd> + <group>PyJobTransforms:TransformCoreTests</group> + <queue>medium</queue> + </jobTransform> + </jobList> + + <jobGroups> + <jobGroup name="TransformCoreTests" parent="RTT:Athena"> + <!-- Keep substep logs and the JSON job report(s) --> + <keepFilePattern>jobReport*.json</keepFilePattern> + <keepFilePattern>log.*</keepFilePattern> + <!-- Suppress the test which checks for *.pool.root files --> + <testToRemove> + <jobGroupName>RTT:Top</jobGroupName> + <testidentifier>CheckFileRunner0</testidentifier> + </testToRemove> + <!-- Supress the test which checks for WARN lines in the logs --> + <testToRemove> + <jobGroupName>RTT:Athena</jobGroupName> + <testidentifier>Athena_FileGrepper1</testidentifier> + </testToRemove> + <!-- Reco often produces suprious "ERROR" messages that the RTT test is too sensitive to --> + <testToRemove> + <jobGroupName>RTT:Athena</jobGroupName> + <testidentifier>Athena_FileGrepper</testidentifier> + </testToRemove> + </jobGroup> + </jobGroups> + + </rtt> + +</unifiedTestConfiguration> diff --git a/Tools/PyJobTransforms/test/test_AODMerge_tf.py b/Tools/PyJobTransforms/test/test_AODMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..1db6804e052d729ff4fe7514b2714dc707378208 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_AODMerge_tf.py @@ -0,0 +1,74 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# $Id: test_AODMerge_tf.py 604373 2014-07-01 09:32:30Z graemes $ +# Run a EVNTMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/valid2.117050.PowhegPythia_P2011C_ttbar.digit.AOD.e2657_s1933_s1964_r5539_tid01483607_00/AOD.*' + +class AODMergetest(unittest.TestCase): + + def test_runAODMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 5) + cmd = ['AODMerge_tf.py', '--inputAODFile'] + cmd.extend(inputs) + cmd.extend(['--outputAOD_MRGFile', 'merged.AOD.pool.root']) + cmd.extend(['--outputTAGFile', 'TAG.pool.root']) + cmd.extend(['--reportName', 'jobReportFast']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReportFast.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 2500) + + def test_runAODMergeSlow(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 5) + cmd = ['AODMerge_tf.py', '--inputAODFile'] + cmd.extend(inputs) + cmd.extend(['--outputAOD_MRGFile', 'slowmerged.AOD.pool.root']) + cmd.extend(['--outputTAGFile', 'slowTAG.pool.root']) + cmd.extend(['--fastPoolMerge', 'False']) + cmd.extend(['--reportName', 'jobReportSlow']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReportSlow.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 2500) + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_AtlasG4_SimTTBar_tf.py b/Tools/PyJobTransforms/test/test_AtlasG4_SimTTBar_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..2b68626a1432d1ae1f14266a4fed14bd02796366 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_AtlasG4_SimTTBar_tf.py @@ -0,0 +1,60 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a Sim job and test output metadata +# $Id: test_AtlasG4_SimTTBar_tf.py 588881 2014-03-21 16:43:39Z graemes $ +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfReports import pyJobReportToFileDict + +sourceFile = '/afs/cern.ch/atlas/offline/ProdData/16.6.X/16.6.7.Y/ttbar_muplusjets-pythia6-7000.evgen.pool.root' + +class SimTTBar_tftest(unittest.TestCase): + + def test_runReco_tf(self): + cmd = ['AtlasG4_tf.py', '--inputEvgenFile', sourceFile] + cmd.extend(['--outputHITSFile', 'test.HITS.pool.root']) + cmd.extend(['--geometryVersion', 'ATLAS-GEO-20-00-01']) + cmd.extend(['--conditionsTag', 'OFLCOND-MC12-SIM-00']) + cmd.extend(['--randomSeed', '10']) + cmd.extend(['--skipEvents', '0']) + cmd.extend(['--maxEvents', '2']) # 2 events - this is a slow job + ## Event counting currently broken for multi-step transforms + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + dataDict = pyJobReportToFileDict(md) + # Change in SimuJobTransforms, but be compatible with type = hits and HITS + dataKey = None + if 'hits' in dataDict.keys(): + dataKey = 'hits' + elif 'HITS' in dataDict.keys(): + dataKey = 'HITS' + self.assertNotEqual(dataKey, None) + self.assertEqual(dataDict[dataKey]['subFiles'][0]['nentries'], 2) + self.assertEqual(dataDict[dataKey]['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(dataDict[dataKey]['subFiles'][0]['conditions_tag'], 'OFLCOND-MC12-SIM-00') + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_ESDMerge_tf.py b/Tools/PyJobTransforms/test/test_ESDMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..6e726ae37f95ccb4d5ebad7a282dcc63bf8e3b8e --- /dev/null +++ b/Tools/PyJobTransforms/test/test_ESDMerge_tf.py @@ -0,0 +1,72 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# $Id: test_ESDMerge_tf.py 573315 2013-12-02 15:45:55Z graemes $ +# Run a EVNTMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/mc12_8TeV.147033.AlpgenPythia_Auto_P2011C_WmunuNp0.recon.DESD_SGLMU.e1880_s1581_s1586_r4767_tid01302235_00/DESD*' + +class ESDMergetest(unittest.TestCase): + + def test_runESDMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 6) + cmd = ['ESDMerge_tf.py', '--inputESDFile'] + cmd.extend(inputs) + cmd.extend(['--outputESD_MRGFile', 'merged.ESD.pool.root']) + cmd.extend(['--reportName', 'jobReportFast']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReportFast.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 66) + + def test_runESDMergeSlow(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 6) + cmd = ['ESDMerge_tf.py', '--inputESDFile'] + cmd.extend(inputs) + cmd.extend(['--outputESD_MRGFile', 'slowmerged.ESD.pool.root']) + cmd.extend(['--fastPoolMerge', 'False']) + cmd.extend(['--reportName', 'jobReportSlow']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReportSlow.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 66) + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_EVNTMerge_tf.py b/Tools/PyJobTransforms/test/test_EVNTMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..dc150732c59bca52f04087cb352d1b68e9eea782 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_EVNTMerge_tf.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# $Id: test_EVNTMerge_tf.py 573315 2013-12-02 15:45:55Z graemes $ +# Run a EVNTMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/mc11_7TeV.117838.TTbar_MT1675_PowHeg_Pythia_P2011C.evgen.EVNT.e1736_tid01148343_00/EVNT.*' + +class EVNTMergetest(unittest.TestCase): + + def test_runEVNTMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 3) + cmd = ['EVNTMerge_tf.py', '--inputEVNTFile'] + cmd.extend(inputs) + cmd.extend(['--outputEVNT_MRGFile', 'merged.EVNT.pool.root']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 15000) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['name'], 'merged.EVNT.pool.root') + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_Echo_tf.py b/Tools/PyJobTransforms/test/test_Echo_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..508a6b8a6e567b110d2e9205eef65bc4570a0471 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Echo_tf.py @@ -0,0 +1,44 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# Simple test of Echo_tf.py +# $Id: test_Echo_tf.py 573315 2013-12-02 15:45:55Z graemes $ +# + +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +class Echotest(unittest.TestCase): + + def test_runEcho(self): + cmd = ['Echo_tf.py'] + cmd.extend(['--testInt', '1234']) + cmd.extend(['--testFloat', '-1.212']) + cmd.extend(['--testIntList', '1,2,3,4,5,6']) + cmd.extend(['--testSubstepList', 'all:juice', 'jane:apple', 'bob:orange', 'alice:pear']) + cmd.extend(['--testSubstepInt', 'all:34', 'jane:1', 'bob:2', 'alice:-3']) + cmd.extend(['--testSubstepBool', 'all:True', 'jane:false', 'bob:tRuE', 'alice:FaLse']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_HISTMerge_tf.py b/Tools/PyJobTransforms/test/test_HISTMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..ef55a5b173b2c22c80619e95e0af7dc32f49b7c8 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_HISTMerge_tf.py @@ -0,0 +1,45 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a TAGMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/data12_8TeV.00211670.express_express.recon.HIST.r5108_tid01383913_00/HIST.*' + +class HISTMergetest(unittest.TestCase): + + def test_runHISTMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 4) + cmd = ['HISTMerge_tf.py', '--inputHISTFile'] + cmd.extend(inputs) + cmd.extend(['--outputHIST_MRGFile', 'merged.HIST.root']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 873) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_LogFileScanner.py b/Tools/PyJobTransforms/test/test_LogFileScanner.py new file mode 100755 index 0000000000000000000000000000000000000000..9e1964ceea335fe0f2d1b54548b8a5dccbb50dde --- /dev/null +++ b/Tools/PyJobTransforms/test/test_LogFileScanner.py @@ -0,0 +1,78 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# Run a few simple "Athena_tf.py" jobs and check that logfile scanning produces the correct results +# $Id: test_LogFileScanner.py 576626 2013-12-21 23:29:31Z graemes $ +# + +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +class LogscanTest(unittest.TestCase): + + def test_athenaNormalScan(self): + cmd = ['Athena_tf.py'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + + def test_athenaDisableFilter(self): + cmd = ['Athena_tf.py', '--ignoreFiles', 'None'] + msg.info('Will run this transform: {0}'.format(cmd)) + cmd.extend(['--reportName', 'jobReportDisable']) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 68) + + + def test_athenaManualFilter(self): + cmd = ['Athena_tf.py', '--ignoreFiles', 'None'] + cmd.extend(['--ignorePatterns', 'An ERROR message', 'A FATAL error message']) + cmd.extend(['--reportName', 'jobReportManual']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + + def test_athenaManualRegexp(self): + cmd = ['Athena_tf.py', '--ignoreFiles', 'None'] + cmd.extend(['--ignorePatterns', 'An? (ERROR|FATAL) .*message']) + cmd.extend(['--reportName', 'jobReportRegexp']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_NTUPMerge_tf.py b/Tools/PyJobTransforms/test/test_NTUPMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..c70c15e036a9a0bd3f93ef5d8e00b62af08ece20 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_NTUPMerge_tf.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# $Id$ +# Run a NTUPMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/mc12_8TeV.107650.AlpgenJimmy_AUET2CTEQ6L1_ZeeNp0.merge.NTUP_ZPRIMEEE.e1218_s1469_s1470_r3542_r3549_p1344_tid01126179_00/NTUP*' + +class NTUPMergetest(unittest.TestCase): + + def test_runNTUPMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 3) + cmd = ['NTUPMerge_tf.py', '--inputNTUP_ZPRIMEEEFile'] + cmd.extend(inputs) + cmd.extend(['--outputNTUP_ZPRIMEEE_MRGFile', 'merged.NTUP_ZPRIMEEE.pool.root']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'],23254) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['name'], 'merged.NTUP_ZPRIMEEE.pool.root') + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_RAWMerge_tf.py b/Tools/PyJobTransforms/test/test_RAWMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..29977b091126d12101edb621b97e9ce0c394f19a --- /dev/null +++ b/Tools/PyJobTransforms/test/test_RAWMerge_tf.py @@ -0,0 +1,49 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a RAWMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/data12_8TeV.00209109.physics_Egamma.recon.DRAW_ZEE.r3970_tid00987558_00/DRAW_ZEE.*' + +class RAWMergetest(unittest.TestCase): + + def test_runRAWMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 6) + cmd = ['RAWMerge_tf.py', '--inputBSFile'] + cmd.extend(inputs) + cmd.extend(['--outputBS_MRGFile', 'merged.DRAW_ZEE._0001.data']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 21) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['beam_type'], ['collisions']) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-05') + self.assertEqual(md['files']['output'][0]['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(md['files']['output'][0]['subFiles'][0]['name'], 'merged.DRAW_ZEE._0001.data') + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py b/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..0a036e90f9e5f8d4ce7aa58271ba8d1400015f15 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py @@ -0,0 +1,69 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a Reco job using AthenaMP and test key metadata in the output +# Note we deliberately keep the ESD as a temporary here, to check this +# works correctly (with no merging) + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfReports import pyJobReportToFileDict +from PyJobTransforms.trfUtils import releaseIsOlderThan + + +sourceFiles = '/afs/cern.ch/atlas/project/rig/referencefiles/dataStreams_high_mu/data12_8TeV/data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1' + +class Reco_tfAthenaMPtest(unittest.TestCase): + + def test_runReco_tf(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 1) + cmd = ['Reco_tf.py', '--inputBSFile'] + cmd.extend(inputs) + cmd.extend(['--autoConfiguration', 'everything']) + cmd.extend(['--outputESDFile', 'my.ESD.pool.root']) + cmd.extend(['--outputAODFile', 'my.AOD.pool.root']) + cmd.extend(['--outputHISTFile', 'my.HIST.root']) + cmd.extend(['--preExec', 'rec.doTrigger=False']) # This is temporary while trigger doesn't work in r19 + cmd.extend(['--outputTAGFile', 'my.TAG.pool.root']) + cmd.extend(['--maxEvents', '24',]) + cmd.append('--athenaopts=--nprocs=4') + + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + dataDict = pyJobReportToFileDict(md) + self.assertTrue('ESD' in dataDict.keys()) + self.assertTrue('AOD' in dataDict.keys()) + self.assertTrue('HIST' in dataDict.keys()) + self.assertEqual(dataDict['ESD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(dataDict['ESD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') + self.assertEqual(dataDict['ESD']['subFiles'][0]['beam_type'], ['collisions']) + self.assertEqual(dataDict['ESD']['subFiles'][0]['name'], 'my.ESD.pool.root') + self.assertEqual(dataDict['AOD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(dataDict['AOD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') + self.assertEqual(dataDict['AOD']['subFiles'][0]['beam_type'], ['collisions']) + self.assertEqual(dataDict['AOD']['subFiles'][0]['name'], 'my.AOD.pool.root') + self.assertEqual(dataDict['HIST']['subFiles'][0]['nentries'], 24) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_Reco_EOS_tf.py b/Tools/PyJobTransforms/test/test_Reco_EOS_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..ed12a6b2d3de6878e7448a4223b8a04bd77b5770 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Reco_EOS_tf.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# Run a Reco job and test key metadata in the output, sources on EOS +# +# This test only makes ESD, as that is sufficient to test the bytestream +# access +# $Id: test_Reco_EOS_tf.py 604373 2014-07-01 09:32:30Z graemes $ +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfReports import pyJobReportToFileDict + +sourceFile = 'root://eosatlas//eos/atlas/user/g/graemes/data12_8TeV.00211620.physics_ZeroBias.merge.RAW/data12_8TeV.00211620.physics_ZeroBias.merge.RAW._lb0916._SFO-ALL._0001.1' + +class Reco_tftest(unittest.TestCase): + + def test_runReco_tf(self): + cmd = ['Reco_tf.py', '--inputBSFile'] + cmd.append(sourceFile) + cmd.extend(['--outputESDFile', 'my.ESD.pool.root', '--autoConfiguration', 'everything']) + cmd.extend(['--maxEvents', '10']) + cmd.extend(['--preExec', 'rec.doTrigger=False']) # This is temporary while trigger doesn't work in r19 + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + dataDict = pyJobReportToFileDict(md) + self.assertTrue('ESD' in dataDict.keys()) + self.assertEqual(dataDict['ESD']['subFiles'][0]['nentries'], 10) + self.assertEqual(dataDict['ESD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(dataDict['ESD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') + self.assertEqual(dataDict['ESD']['subFiles'][0]['beam_type'], ['collisions']) + self.assertEqual(dataDict['ESD']['subFiles'][0]['name'], 'my.ESD.pool.root') + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_Reco_Tier0_tf.py b/Tools/PyJobTransforms/test/test_Reco_Tier0_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..169a8efabe3c53141b446f4fdaa787f632334bf7 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Reco_Tier0_tf.py @@ -0,0 +1,72 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# Run a Reco job and test key metadata in the output, sources on EOS +# +# This test only makes ESD, as that is sufficient to test the bytestream +# access +# $Id: test_Reco_EOS_tf.py 573368 2013-12-02 18:56:04Z graemes $ +# + +import glob +import json +import subprocess +import os +import os.path +import cPickle as pickle +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfReports import pyJobReportToFileDict + +sourceFile = 'root://eosatlas//eos/atlas/user/g/graemes/data12_8TeV.00211620.physics_ZeroBias.merge.RAW/data12_8TeV.00211620.physics_ZeroBias.merge.RAW._lb0916._SFO-ALL._0001.1' + +class RecoTier0test(unittest.TestCase): + + def test_Reco_Tier0_tf(self): + pFile = 'job.pkl' + + cmd = "Reco_tf.py --inputBSFile /afs/cern.ch/atlas/project/rig/referencefiles/dataStreams_high_mu/data12_8TeV/data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1 --maxEvents 5 --autoConfiguration everything --preExec 'rec.doDetailedAuditor=True' 'rec.doNameAuditor=True' 'rec.doCalo=False' 'rec.doInDet=False' 'rec.doMuon=False' 'rec.doJetMissingETTag=False' 'rec.doEgamma=False' 'rec.doMuonCombined=False' 'rec.doTau=False' 'rec.doTrigger=False' --outputESDFile myESD.pool.root --dumpPickle {0}".format(pFile).split() + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Check the pickle was produced + self.assertEqual(os.access(pFile, os.R_OK), True) + unpickleFile = open(pFile, 'r') + contents = pickle.load(unpickleFile) + unpickleFile.close() + self.assertEqual(isinstance(contents, dict), True) + + # Now configure and run the transform from the pickle file + cmd = "Reco_tf.py --argdict {0} --outputESDFile newESD.pool.root".format(pFile).split() + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + dataDict = pyJobReportToFileDict(md) + self.assertTrue('ESD' in dataDict.keys()) + self.assertEqual(dataDict['ESD']['subFiles'][0]['nentries'], 5) + self.assertEqual(dataDict['ESD']['subFiles'][0]['name'], 'newESD.pool.root') + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_Reco_tf.py b/Tools/PyJobTransforms/test/test_Reco_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..54858c82e6bf4153dce9e3422afa4212b99941d4 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Reco_tf.py @@ -0,0 +1,69 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a Reco job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfReports import pyJobReportToFileDict + +sourceFiles = '/afs/cern.ch/atlas/project/rig/referencefiles/dataStreams_high_mu/data12_8TeV/data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1' + +class Reco_tftest(unittest.TestCase): + + def test_runReco_tf(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 1) + cmd = ['Reco_tf.py', '--inputBSFile'] + cmd.extend(inputs) + cmd.extend(['--outputESDFile', 'my.ESD.pool.root', '--autoConfiguration', 'everything']) + cmd.extend(['--outputAODFile', 'my.AOD.pool.root']) + cmd.extend(['--outputHISTFile', 'my.HIST.root']) + cmd.extend(['--outputTAGFile', 'my.TAG.pool.root']) + cmd.extend(['--maxEvents', '10']) + cmd.extend(['--preExec', 'rec.doTrigger=False']) # This is temporary while trigger doesn't work in r19 + ## Event counting currently broken for multi-step transforms + cmd.extend(['--checkEventCount', 'true']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + dataDict = pyJobReportToFileDict(md) + self.assertTrue('ESD' in dataDict.keys()) + self.assertTrue('AOD' in dataDict.keys()) + self.assertTrue('HIST' in dataDict.keys()) + self.assertTrue('TAG' in dataDict.keys()) + self.assertEqual(dataDict['ESD']['subFiles'][0]['nentries'], 10) + self.assertEqual(dataDict['ESD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(dataDict['ESD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') + self.assertEqual(dataDict['ESD']['subFiles'][0]['beam_type'], ['collisions']) + self.assertEqual(dataDict['ESD']['subFiles'][0]['name'], 'my.ESD.pool.root') + self.assertEqual(dataDict['AOD']['subFiles'][0]['nentries'], 10) + self.assertEqual(dataDict['AOD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') + self.assertEqual(dataDict['AOD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') + self.assertEqual(dataDict['AOD']['subFiles'][0]['beam_type'], ['collisions']) + self.assertEqual(dataDict['AOD']['subFiles'][0]['name'], 'my.AOD.pool.root') + self.assertEqual(dataDict['HIST']['subFiles'][0]['nentries'], 10) + self.assertEqual(dataDict['TAG']['subFiles'][0]['nentries'], 10) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_Sleep_tf.py b/Tools/PyJobTransforms/test/test_Sleep_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..96594f2ba99e7a8d4fd0fe8c24242e3b4ae21a67 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Sleep_tf.py @@ -0,0 +1,38 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# Simple test of Sleep_tf.py +# $Id: test_Sleep_tf.py 573333 2013-12-02 16:20:46Z graemes $ +# + +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +class Echotest(unittest.TestCase): + + def test_runEcho(self): + cmd = ['Sleep_tf.py', '--dust', '10'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_TAGMerge_tf.py b/Tools/PyJobTransforms/test/test_TAGMerge_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..bb2a3ca5ff2aa98306fc30c2ceb7e3da7ae128d5 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_TAGMerge_tf.py @@ -0,0 +1,46 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a TAGMerge job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg + +sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/data12_8TeV.00207865.physics_JetTauEtmiss.merge.TAG.r4065_p1278_tid01030417_00/TAG.*' + +class TAGMergetest(unittest.TestCase): + + def test_runTAGMerge(self): + inputs = glob.glob(sourceFiles) + self.assertEqual(len(inputs), 2) + cmd = ['TAGMerge_tf.py', '--inputTAGFile'] + cmd.extend(inputs) + cmd.extend(['--outputTAG_MRGFile', 'merged.TAG.pool.root']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['nentries'], 78232) + self.assertEqual(md['files']['output'][0]['subFiles'][0]['beam_type'], []) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_transform.py b/Tools/PyJobTransforms/test/test_transform.py new file mode 100755 index 0000000000000000000000000000000000000000..27150d4637d44819b9473249a911e6d86ca6a87f --- /dev/null +++ b/Tools/PyJobTransforms/test/test_transform.py @@ -0,0 +1,29 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_transform.py +# @brief Unittests for transform.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_transform.py 529035 2012-12-05 15:45:24Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.transform import * + +# Unittests for this module +class transformsTests(unittest.TestCase): + + def test_transformSetup(self): + # Test we can get a transform + from PyJobTransforms.trfExe import echoExecutor + tf = transform(executor = echoExecutor()) + self.assertTrue(isinstance(tf, transform)) + +if __name__ == '__main__': + unittest.main() + diff --git a/Tools/PyJobTransforms/test/test_trfAMI.py b/Tools/PyJobTransforms/test/test_trfAMI.py new file mode 100755 index 0000000000000000000000000000000000000000..9941ca91b9639f60b8dd57c36e2f8d39d38a4125 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfAMI.py @@ -0,0 +1,82 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfAMI.py +# @brief Unittests for trfAMI.py +# @author bjorn.sarrazin@cern.ch +# @version $Id: test_trfAMI.py 590840 2014-04-02 12:22:48Z graemes $ + +import unittest + + +import logging +msg = logging.getLogger(__name__) + +from PyJobTransforms.trfAMI import TagInfo, TrfConfig +from PyJobTransforms.trfArgs import addAthenaArguments + +## Unittests for trfAMI module +class trfAMIUnitTests(unittest.TestCase): + + # test T0 tag + def test_info_q120(self): + physics={'AMITag':'q120', + 'maxEvents': '-1', + 'autoConfiguration':'everything', + 'preExec':'rec.doFloatingPointException.set_Value_and_Lock(True)'} + + tag=TagInfo('q120') + self.assertTrue(isinstance(tag.trfs[0], TrfConfig)) + self.assertEqual(tag.isProdSys, False) + self.assertEqual(tag.trfs[0].name, 'Reco_trf.py') + self.assertEqual(tag.trfs[0].release, '17.2.0.3') + self.assertEqual(tag.trfs[0].physics, physics) + + # test multiple transforms tag from ProdSys + def test_info_a180(self): + physics={'conditionsTag':'OFLCOND-MC12-SIM-00', + 'postInclude':'FastCaloSimHit/postInclude.AF2FilterHitItems.py FastSimulationJobTransforms/jobConfig.FastCaloSim_ID_cuts.py FastSimulationJobTransforms/jobConfig.egamma_lateral_shape_tuning.config20.py', + 'DBRelease':'20.2.2', + 'preInclude':'FastSimulationJobTransforms/jobConfig.v14_Parametrisation.py FastCaloSimHit/preInclude.AF2Hit.py', + 'geometryVersion':'ATLAS-GEO-20-00-01'} + + + tag=TagInfo('a180') + self.assertEqual(len(tag.trfs),2) + self.assertTrue(isinstance(tag.trfs[0], TrfConfig)) + self.assertTrue(isinstance(tag.trfs[1], TrfConfig)) + self.assertEqual(tag.isProdSys, True) + self.assertEqual(tag.trfs[0].name, 'Merging_trf.py') + self.assertEqual(tag.trfs[0].release, '17.2.1.4.2,TrigMC') + self.assertEqual(tag.trfs[0].physics, physics) + self.assertEqual(tag.trfs[1].name, 'DigiMReco_trf.py') + self.assertEqual(tag.trfs[1].release, '17.2.1.4.2,TrigMC') + + # test setup of transform + def test_transform(self): + from PyJobTransforms.transform import transform + from PyJobTransforms.trfArgClasses import argFactory, argString + from PyJobTransforms.trfDefaultFiles import getInputFileName + + tf=transform() + addAthenaArguments(tf.parser) + tf.parser.add_argument('--inputBSFile', type=argFactory(argString)) + tf.parser.add_argument('--outputBSFile', type=argFactory(argString)) + tf.parser.add_argument('--extraArg', type=argFactory(argString)) + + tf.parseCmdLineArgs('--AMIConfig=p1346 --extraArg=special --outputBSFile=myOutput'.split()) + + # not part of tag + self.assertEqual(tf.argdict['extraArg'].value, 'special') + # part of tag + self.assertEqual(tf.argdict['inputBSFile'].value, getInputFileName('inputBSFile')) + # part of tag but overwritten at command line + self.assertEqual(tf.argdict['outputBSFile'].value, 'myOutput') + + +if __name__ == '__main__': + unittest.main() + + + diff --git a/Tools/PyJobTransforms/test/test_trfArgClasses.py b/Tools/PyJobTransforms/test/test_trfArgClasses.py new file mode 100755 index 0000000000000000000000000000000000000000..d644f56e8b00bdf8c44b4f555d61ec3e7dbfabe2 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfArgClasses.py @@ -0,0 +1,554 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfArgClasses.py +# @brief Unittests for test_trfArgClasses.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfArgClasses.py 604265 2014-06-30 14:53:32Z graemes $ + +import os +import sys +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfArgClasses import * + +## Unittests for this module +class trfArgumentTests(unittest.TestCase): + + def setUp(self): + self.message = 'a string' + + self.a = argument() + self.b = argument() + self.c = argument(self.message) + self.d = argument(self.message, runarg = False) + + ## Test basic properties of arguments + def test_argumentInitialValue(self): + self.assertEqual(self.a.value, None) + self.assertEqual(self.c.value, self.message) + + def test_argumentCmp(self): + self.assertEqual(self.a, self.b) + self.assertNotEqual(self.a, self.c) + + def test_argumentSetValue(self): + self.a.value = self.message + self.assertEqual(self.a.value, self.message) + self.assertEqual(self.a, self.c) + + def test_argumentRunarg(self): + self.assertEqual(self.a.isRunarg, True) + self.assertEqual(self.d.isRunarg, False) + +class trfArgStringTests(unittest.TestCase): + + def setUp(self): + self.message = 'Flibbertigibbet' + self.aFloat = 1922 + + self.a = argString() + self.b = argString(self.message) + self.c = argString(self.aFloat) + self.d = argString(self.message, runarg = False) + self.veg = argString('potato', choices = ['potato', 'carrot', 'broccoli', 'onion'], runarg = True) + + ## Test basic properties of arguments + def test_argStringInitialValue(self): + self.assertEqual(self.a.value, '') + self.assertEqual(self.b.value, self.message) + self.assertEqual(self.c.value, str(self.aFloat)) + + def test_argStringCmp(self): + self.assertEqual(self.b, self.d) + self.assertNotEqual(self.b, self.c) + + def test_argStringSetValue(self): + self.a.value = self.message + self.assertEqual(self.a.value, self.message) + self.assertEqual(self.a, self.b) + + def test_argStringRunarg(self): + self.assertEqual(self.a.isRunarg, True) + self.assertEqual(self.d.isRunarg, False) + + def test_argStringprodsysDescription(self): + self.assertEqual(self.a.prodsysDescription['type'],'string') + + def test_argStringChoices(self): + self.assertEqual(self.veg.value, 'potato') + self.veg.value = 'onion' + self.assertEqual(self.veg.value, 'onion') + + def test_argStringChoicesBad(self): + self.assertRaises(trfExceptions.TransformArgException, argString, 'orange', + choices = ['potato', 'carrot', 'broccoli', 'onion']) + +class trfArgIntTests(unittest.TestCase): + + def setUp(self): + self.anInt1 = 1968 + self.anInt2 = -1066 + self.aValidIntString = '45' + + self.a = argInt() + self.b = argInt(self.anInt1) + self.c = argInt(self.anInt2) + self.d = argInt(self.anInt1, runarg = False) + self.e = argInt(self.aValidIntString) + + ## Test basic properties of arguments + def test_argIntInitialValue(self): + self.assertEqual(self.a.value, 0) + self.assertEqual(self.b.value, self.anInt1) + self.assertEqual(self.c.value, self.anInt2) + self.assertEqual(self.e.value, int(self.aValidIntString)) + + def test_argIntCmp(self): + self.assertEqual(self.b, self.d) + self.assertNotEqual(self.b, self.c) + + def test_argIntSetValue(self): + self.a.value = self.anInt1 + self.assertEqual(self.a.value, self.anInt1) + self.assertEqual(self.a, self.b) + + def test_argIntRunarg(self): + self.assertEqual(self.a.isRunarg, True) + self.assertEqual(self.d.isRunarg, False) + + def test_argIntSetBadValue(self): + self.assertRaises(trfExceptions.TransformArgException, argInt, 'rubbish') + self.assertRaises(trfExceptions.TransformArgException, argInt, '1.141') + + def test_argIntprodsysDescription(self): + self.assertEqual(self.a.prodsysDescription['type'],'INT') + +class trfArgBoolTests(unittest.TestCase): + + def setUp(self): + self.a = argBool() + self.b = argBool(True) + self.c = argBool(False, runarg = False) + self.d = argBool(True, runarg = False) + + ## Test basic properties of arguments + def test_argBoolInitialValue(self): + self.assertEqual(self.a.value, False) + self.assertEqual(self.b.value, True) + self.assertEqual(self.c.value, False) + + def test_argBoolCmp(self): + self.assertEqual(self.b, self.d) + self.assertNotEqual(self.b, self.c) + + def test_argBoolSetValue(self): + self.a.value = True + self.assertEqual(self.a.value, True) + self.assertEqual(self.a, self.b) + + def test_argBoolRunarg(self): + self.assertEqual(self.a.isRunarg, True) + self.assertEqual(self.d.isRunarg, False) + + def test_argBoolprodsysDescription(self): + self.assertEqual(self.a.prodsysDescription['type'],'bool') + +class trfArgFloatTests(unittest.TestCase): + + def setUp(self): + self.aFloat1 = 1922.67 + self.aFloat2 = -1066.45 + self.aValidFloatString = '3.14' + + self.a = argFloat() + self.b = argFloat(self.aFloat1) + self.c = argFloat(self.aFloat2) + self.d = argFloat(self.aFloat1, runarg = False) + self.e = argFloat(self.aValidFloatString) + self.f = argFloat(self.aFloat1, min = 1000, max = 2000) + + ## Test basic properties of arguments + def test_argFloatInitialValue(self): + self.assertEqual(self.a.value, 0.0) + self.assertEqual(self.b.value, self.aFloat1) + self.assertEqual(self.c.value, self.aFloat2) + self.assertEqual(self.e.value, float(self.aValidFloatString)) + + def test_argFloatCmp(self): + self.assertEqual(self.b, self.d) + self.assertNotEqual(self.b, self.c) + + def test_argFloatSetValue(self): + self.a.value = self.aFloat1 + self.assertEqual(self.a.value, self.aFloat1) + self.assertEqual(self.a, self.b) + + def test_argFloatRunarg(self): + self.assertEqual(self.a.isRunarg, True) + self.assertEqual(self.d.isRunarg, False) + + def test_argFloatSetBadValue(self): + self.assertRaises(trfExceptions.TransformArgException, argFloat, 'rubbish') + + def test_argFloatMinMax(self): + # This code does not work until python2.7 +# with self.assertRaises(trfExceptions.TransformArgException): +# self.f.value = 2100 +# with self.assertRaises(trfExceptions.TransformArgException): +# self.f.value = 900 + self.assertRaises(trfExceptions.TransformArgException, argFloat, value = 67, max = 60) + self.assertRaises(trfExceptions.TransformArgException, argFloat, value = 67, min = 100) + + def test_argFloatprodsysDescription(self): + self.assertEqual(self.f.prodsysDescription['type'],'float') + self.assertEqual(self.f.prodsysDescription['min'],self.f._min) + self.assertEqual(self.f.prodsysDescription['max'],self.f._max) + +class argListTests(unittest.TestCase): + def test_basicList(self): + lst = argList('bob,sally,jack') + self.assertEqual(len(lst.value), 3) + + def test_emptySupression(self): + l_nosup = argList('bob,,sally,jack', supressEmptyStrings = False) + l_sup = argList('bob,,sally,jack', supressEmptyStrings = True) + self.assertEqual(len(l_sup.value), 3) + self.assertEqual(len(l_nosup.value), 4) + + def test_emptyList(self): + lst = argList('', supressEmptyStrings = True) + self.assertEqual(lst.value, []) + + +class argIntListTests(unittest.TestCase): + def test_basicIntList(self): + lst = argIntList('56,89,-100') + self.assertEqual(len(lst.value), 3) + self.assertEqual(lst.value[2], -100) + + def test_emptyIntListSupression(self): + lst = argIntList('23,,-50,99', supressEmptyStrings = True) + self.assertEqual(len(lst.value), 3) + + def test_emptyIntList(self): + lst = argIntList('', supressEmptyStrings = True) + self.assertEqual(lst.value, []) + + def test_illegalIntList(self): + self.assertRaises(trfExceptions.TransformArgException, argIntList, value = "notAnInt") + + +class argKeyFloatValueListTests(unittest.TestCase): + def test_basicKFVList(self): + lst = argKeyFloatValueList('ESD:123.6,AOD:456.1,TAG:0,HIST:-1') + self.assertEqual(len(lst.value), 4) + self.assertEqual(lst.value['ESD'], 123.6) + self.assertEqual(lst.value['AOD'], 456.1) + self.assertEqual(lst.value['TAG'], 0) + self.assertEqual(lst.value['HIST'], -1) + + def test_emptyKFVSupression(self): + lst = argKeyFloatValueList('ESD:123,AOD:456,TAG:0,,HIST:-1', supressEmptyStrings = True) + self.assertEqual(len(lst.value), 4) + self.assertEqual(lst.value['ESD'], 123) + self.assertEqual(lst.value['AOD'], 456) + self.assertEqual(lst.value['TAG'], 0) + self.assertEqual(lst.value['HIST'], -1) + + def test_emptyKFVList(self): + lst = argKeyFloatValueList('', supressEmptyStrings = True) + self.assertEqual(lst.value, {}) + + def test_illegalKFVList(self): + self.assertRaises(trfExceptions.TransformArgException, argKeyFloatValueList, value = "foo:bar") + self.assertRaises(trfExceptions.TransformArgException, argKeyFloatValueList, value = "foobar") + + +class argPickleDictTests(unittest.TestCase): + # Write me! + pass + + +class argSubstepTests(unittest.TestCase): + def test_substepBasic(self): + ss = argSubstep('free biscuits') + self.assertEqual(ss.value, {'all': 'free biscuits'}) + + def test_substepBasicAll(self): + ss = argSubstep('all:free biscuits') + self.assertEqual(ss.value, {'all': 'free biscuits'}) + + def test_substepFromList(self): + ss = argSubstep(['free biscuits', 'fruit:apple', 'drink:lemonade']) + self.assertEqual(ss.value, {'all': 'free biscuits', 'fruit': 'apple', 'drink': 'lemonade'}) + + def test_substepFromDict(self): + d = {'all': 'foo', 'fruit': 'bar', 'drink': 'baz'} + ss = argSubstep(d) + self.assertEqual(ss.value, d) + + def test_substepFloat(self): + ss = argSubstepFloat("1.2") + self.assertEqual(ss.value, {'all': 1.2}) + + def test_substepFloatMinMax(self): + ss = argSubstepFloat("1.2",min=1,max=2) + self.assertEqual(ss.value, {'all': 1.2}) + + def test_substepFloatMinMaxBroken(self): + self.assertRaises(trfExceptions.TransformArgException, argSubstepFloat, "1.2", min=0., max=1. ) + + def test_substepInt(self): + ss = argSubstepInt("4711") + self.assertEqual(ss.value, {'all': 4711}) + + def test_substepEqualsSeparator(self): + ss = argSubstep("free=juice", separator='=') + self.assertEqual(ss.value, {"free": "juice"}) + + +class argSubstepListTests(unittest.TestCase): + def test_substepBasic(self): + ss = argSubstepList('free biscuits') + self.assertEqual(ss.value, {'all': ['free biscuits']}) + + def test_substepBasicAll(self): + ss = argSubstepList('all:free biscuits') + self.assertEqual(ss.value, {'all': ['free biscuits']}) + + def test_substepList(self): + ss = argSubstepList(['all:free biscuits', 'fruit:apple', 'fruit:pear', 'drink:lemonade', 'first:lay table', 'default:mince']) + self.assertEqual(ss.value, {'all': ['free biscuits'], 'fruit': ['apple', 'pear'], 'drink': ['lemonade'], + 'first': ['lay table'], 'default': ['mince']}) + self.assertEqual(ss.returnMyValue(name='fruit'), ['free biscuits', 'apple', 'pear']) + self.assertEqual(ss.returnMyValue(name='entree'), ['free biscuits', 'mince']) + self.assertEqual(ss.returnMyValue(name='entree', first=True), ['free biscuits', 'lay table']) + + def test_substepListAll(self): + ss = argSubstepList(['all:free biscuits', 'fruit:apple', 'fruit:pear', 'drink:lemonade', 'free beer']) + self.assertEqual(ss.value, {'all': ['free biscuits', 'free beer'], 'fruit': ['apple', 'pear'], 'drink': ['lemonade']}) + + def test_substepListSplitter(self): + ss = argSubstepList(['all:free,biscuits', 'fruit:apple', 'fruit:pear', 'drink:lemonade', 'first:lay,table', 'default:mince'], splitter=',') + self.assertEqual(ss.value, {'all': ['free', 'biscuits'], 'fruit': ['apple', 'pear'], 'drink': ['lemonade'], + 'first': ['lay', 'table'], 'default': ['mince']}) + self.assertEqual(ss.returnMyValue(name='fruit'), ['free', 'biscuits', 'apple', 'pear']) + self.assertEqual(ss.returnMyValue(name='entree'), ['free', 'biscuits', 'mince']) + self.assertEqual(ss.returnMyValue(name='entree', first=True), ['free', 'biscuits', 'lay', 'table']) + + def test_substepFromDict(self): + d = {'all': ['free biscuits', 'will come'], 'fruit': ['apple', 'pear'], 'drink': ['dishwater']} + ss = argSubstepList(d) + self.assertEqual(ss.value, d) + + def test_substepBroken(self): + self.assertRaises(trfExceptions.TransformArgException, argSubstepList, value={'key' : 'value'}) + + +class argSteeringTests(unittest.TestCase): + def test_basicStrSet(self): + steer = argSubstepSteering('RAWtoESD:out-RDO') + self.assertEqual(len(steer.value), 1) + self.assertEqual(steer.value, {'RAWtoESD': [('out', '-', 'RDO')]}) + + def test_multiStrSet(self): + steer = argSubstepSteering('RAWtoESD:out-RDO,out+RDO_TRIG') + self.assertEqual(len(steer.value), 1) + self.assertEqual(steer.value, {'RAWtoESD': [('out', '-', 'RDO'), ('out', '+', 'RDO_TRIG')]}) + + def test_multiStrSet2(self): + steer = argSubstepSteering(['RAWtoESD:out-RDO,out+RDO_TRIG', 'ESDtoAOD:in+JUNK']) + self.assertEqual(len(steer.value), 2) + self.assertEqual(steer.value, {'RAWtoESD': [('out', '-', 'RDO'), ('out', '+', 'RDO_TRIG')], 'ESDtoAOD': [('in', '+', 'JUNK')]}) + + +class argFileTests(unittest.TestCase): + def setUp(self): + # In python 2.7 support for multiple 'with' expressions becomes available + with open('file1', 'w') as f1: + print >>f1, 'This is test file 1' + with open('file2', 'w') as f2: + print >>f2, 'Short file 2' + with open('file3', 'w') as f3: + print >>f3, 80*'-', 'Long file 3', 80*'-' + with open('prefix.prodsysfile._001.suffix.1', 'w') as f1: + print >>f1, 'This is prodsys test file 1' + with open('prefix.prodsysfile._002.suffix.4', 'w') as f2: + print >>f2, 'Short prodsys file 2' + with open('prefix.prodsysfile._003.suffix.7', 'w') as f3: + print >>f3, 80*'-', 'Long prodsys file 3', 80*'-' + + self.mySingleFile = argFile(['file1'], io='output') + self.myMultiFile = argFile(['file1', 'file2', 'file3'], io='input') + self.myManualGUIDSingleFile = argFile(['file1'], io='output', guid={'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B'}) + self.myManualGUIDMultiFile = argFile(['file1', 'file2', 'file3'], io='input', guid={'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'file2': '1368D295-27C6-4A92-8187-704C2A6A5864', 'file3': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9'}) + + def tearDown(self): + for f in ('file1', 'file2', 'file3', 'prefix.prodsysfile._001.suffix.1', 'prefix.prodsysfile._002.suffix.4', + 'prefix.prodsysfile._003.suffix.7'): + try: + os.unlink(f) + except OSError: + pass + + def test_argFileInitialValue(self): + self.assertEqual(self.mySingleFile.value, ['file1',]) + self.assertEqual(self.myMultiFile.value, ['file1', 'file2', 'file3']) + + def test_athFileValueSetter(self): + self.mySingleFile.value = 'theAmazingFile' + self.assertEqual(self.mySingleFile.value, ['theAmazingFile',]) + self.myMultiFile.value = ('file3', 'file2') + self.assertEqual(self.myMultiFile.value, ['file3', 'file2']) + # Next test fails because input files glob to nothing + self.assertRaises(trfExceptions.TransformArgException, self.mySingleFile.__init__, value=['fileXXX', 'fileYYY']) + + def test_argFileGlob(self): + myInput = argFile('file?', io='input') + # Use set comparison as glob order is not guaranteed + self.assertEqual(set(myInput.value), set(['file1', 'file2', 'file3'])) + + def test_argFileProdsysGlob(self): + myInput = argFile('prefix.prodsysfile._[001,002,003].suffix', io='input') + self.assertEqual(myInput.value, ['prefix.prodsysfile._001.suffix.1', 'prefix.prodsysfile._002.suffix.4', + 'prefix.prodsysfile._003.suffix.7']) + + def test_argFileIO(self): + self.assertEqual(self.mySingleFile.io, 'output') + self.assertEqual(self.myMultiFile.io, 'input') + + def test_argFileDataset(self): + withDataset = argFile('fakeDatasetName#file1') + self.assertEqual(withDataset.dataset, 'fakeDatasetName') + + def test_argFileMetadata(self): + # Can't test all metadata directly now we added a GUID generator + self.assertTrue(cmpMetadata(self.mySingleFile.getMetadata(), {'file1': {'_exists': True, 'file_guid': 'D6F5F632-4EA6-4EA6-9A78-9CF59C247094', 'integrity': True, 'file_size': 20}})) + self.assertTrue(cmpMetadata(self.myMultiFile.getMetadata(),{'file3': {'_exists': True, 'file_guid': 'CAB26113-8CEC-405A-BEDB-9B1CFDD96DA8', 'integrity': True, 'file_size': 174}, 'file2': {'_exists': True, 'file_guid': '23310685-BBC3-4385-9870-1622D15B4C2D', 'integrity': True, 'file_size': 13}, 'file1': {'_exists': True, 'file_guid': '9E3EDD48-978E-4441-97B9-F3FF4241DE80', 'integrity': True, 'file_size': 20}})) + + def test_argFileMetadataWithGUID(self): + self.assertEqual(self.myManualGUIDSingleFile.getMetadata(), {'file1': {'_exists': True, 'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'integrity': True, 'file_size': 20}}) + self.assertEqual(self.myManualGUIDMultiFile.getMetadata(), {'file3': {'_exists': True, 'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', 'integrity': True, 'file_size': 174}, 'file2': {'_exists': True, 'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', 'integrity': True, 'file_size': 13}, 'file1': {'_exists': True, 'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'integrity': True, 'file_size': 20}}) + + def test_argFileprodsysDescription(self): + self.assertEqual(self.mySingleFile.prodsysDescription['type'],'file') + + def test_argFileDuplicateRemoval(self): + self.myMultiFile.value = ('file1', 'file1', 'file3', 'file2', 'file3', 'file1') + self.assertEqual(self.myMultiFile.value, ['file1', 'file3', 'file2']) # Base order preserved + + ## @brief Test isCached() method + def test_argFileMetadataCache(self): + self.assertFalse(self.myMultiFile.isCached(metadataKeys = ['file_size'])) + self.myMultiFile.getMetadata(metadataKeys = ['file_size']) + self.assertTrue(self.myMultiFile.isCached(metadataKeys = ['file_size'])) + + ## @brief Test that we can manually set metadata items + def test_argFileSetMetadata(self): + self.myMultiFile._setMetadata(files=None, metadataKeys={'file_size': 1234567, '_exists': True}) + self.assertEqual(self.myMultiFile.getSingleMetadata('file1', 'file_size'), 1234567) + +class argFileEOSTests(unittest.TestCase): + def test_SimExpansion(self): + hitsInputs = argFile('root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.743321._000[029,200].pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.743321._000[614,817].pool.root.5', io='input') + self.assertEqual(len(hitsInputs.value), 4) + + def test_SimGlobStar(self): + hitsInputs = argFile('root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.*', io='input') + self.assertEqual(len(hitsInputs.value), 4) + + def test_SimGlobMatchSingle(self): + hitsInputs = argFile('root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.743321._??????.pool.root.?', io='input') + self.assertEqual(len(hitsInputs.value), 4) + + def test_SimGlobMatchBoth(self): + hitsInputs = argFile('root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.743321._*.pool.root.?', io='input') + self.assertEqual(len(hitsInputs.value), 4) + + +class argPOOLFiles(unittest.TestCase): + def tearDown(self): + for f in 'athfile-cache.ascii.gz', 'athfile-infos.ascii': + try: + os.unlink(f) + except OSError: + pass + + def test_argPOOLFileMetadata_ESD(self): + try: + testFile = '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root' + os.stat(testFile) + esdFile = argPOOLFile(testFile, io = 'input', type='esd') + self.assertEqual(esdFile.getMetadata(), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'_exists': True, 'run_number': [182796L], 'beam_energy': [3500000.0], 'file_type': 'pool', 'AODFixVersion': '', 'file_size': 17033381L, 'geometry': 'ATLAS-GEO-16-00-01', 'file_guid': '0CABA22E-9096-E011-AE25-0030487C8CE6', 'beam_type': ['collisions'], 'lumi_block': [300L], 'conditions_tag': 'COMCOND-BLKPST-004-00', 'integrity': True, 'nentries': 10L}}) + self.assertEqual(esdFile.getMetadata(metadataKeys = ('nentries',)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'nentries': 10}}) + self.assertEqual(esdFile.prodsysDescription['type'],'file') + except OSError: + # With python 2.7 this should call the self.skip() method + print >>sys.stderr, 'WARNING Skipping test_argPOOLFileMetadata_ESD - stat on AFS test file failed' + + + + + def test_argPOOLFileMetadata_AOD(self): + try: + testFile = '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root' + os.stat(testFile) + aodFile = argPOOLFile(testFile, io = 'input', type='aod') + self.assertEqual(aodFile.getMetadata(), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'_exists': True, 'run_number': [182796L], 'beam_energy': [3500000.0], 'file_type': 'pool', 'AODFixVersion': '', 'file_size': 4673269L, 'geometry': 'ATLAS-GEO-16-00-01', 'file_guid': '6E1FE6F0-9096-E011-9DDA-0030487C8CE6', 'beam_type': ['collisions'], 'lumi_block': [300L], 'conditions_tag': 'COMCOND-BLKPST-004-00', 'integrity': True, 'nentries': 10L}}) + self.assertEqual(aodFile.getMetadata(metadataKeys = ('nentries',)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'nentries': 10}}) + self.assertEqual(aodFile.prodsysDescription['type'],'file') + self.assertTrue(aodFile.prodsysDescription['subtype']=='AOD') + except OSError: + # With python 2.7 this should call the self.skip() method + print >>sys.stderr, 'WARNING Skipping test_argPOOLFileMetadata_AOD - stat on AFS test file failed' + + + +class argBSFiles(unittest.TestCase): + def tearDown(self): + for f in 'athfile-cache.ascii.gz', 'athfile-infos.ascii': + try: + os.unlink(f) + except OSError: + pass + + def test_argBSFileMetadata(self): + try: + testFile = '/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts' + os.stat(testFile) + rawFile = argBSFile(testFile, io = 'input', type='bs') + self.assertEqual(rawFile.getMetadata(), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts': {'_exists': True, 'run_number': [191920], 'beam_energy': [0], 'file_type': 'bs', 'AODFixVersion': '', 'file_size': 12998048L, 'geometry': None, 'file_guid': '4A511034-3A53-E111-8745-003048F0E7AE', 'beam_type': ['collisions'], 'lumi_block': [257], 'conditions_tag': None, 'integrity': True, 'nentries': 10}}) + self.assertEqual(rawFile.getMetadata(metadataKeys = ('nentries',)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts': {'nentries': 10}}) + + self.assertEqual(rawFile.prodsysDescription['type'],'file') + except OSError: + # With python 2.7 this should call the self.skip() method + print >>sys.stderr, 'WARNING Skipping test_argAthenaFileMetadata - stat on AFS test file failed' + + def test_argBSMultiFileMetadata(self): + try: + testFiles = ['/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts', + '/afs/cern.ch/atlas/offline/test/data11_cos.00182609.physics_CosmicCalo.merge.RAW._lb0100._SFO-ALL._0001.1.SFO-ALL._0001.1.10evts.data', + '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.RAW._lb0300._SFO-10._0001.1.10evts.data', + ] + for file in testFiles: + os.stat(file) + rawFile = argBSFile(testFiles, io = 'input', type = 'bs') + self.assertEqual(rawFile.getMetadata(), {'/afs/cern.ch/atlas/offline/test/data11_cos.00182609.physics_CosmicCalo.merge.RAW._lb0100._SFO-ALL._0001.1.SFO-ALL._0001.1.10evts.data': {'_exists': True, 'run_number': [182609], 'beam_energy': [0], 'file_type': 'bs', 'AODFixVersion': '', 'file_size': 10487108L, 'geometry': None, 'file_guid': 'E8B69378-EF91-E011-A6F0-003048CAD388', 'beam_type': ['cosmics'], 'lumi_block': [100], 'conditions_tag': None, 'integrity': True, 'nentries': 10}, '/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts': {'_exists': True, 'run_number': [191920], 'beam_energy': [0], 'file_type': 'bs', 'AODFixVersion': '', 'file_size': 12998048L, 'geometry': None, 'file_guid': '4A511034-3A53-E111-8745-003048F0E7AE', 'beam_type': ['collisions'], 'lumi_block': [257], 'conditions_tag': None, 'integrity': True, 'nentries': 10}, '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.RAW._lb0300._SFO-10._0001.1.10evts.data': {'_exists': True, 'run_number': [182796], 'beam_energy': [3500], 'file_type': 'bs', 'AODFixVersion': '', 'file_size': 12946296L, 'geometry': None, 'file_guid': 'FABAAD37-B38E-E011-8C1D-003048CAD384', 'beam_type': ['collisions'], 'lumi_block': [300], 'conditions_tag': None, 'integrity': True, 'nentries': 10}}) + self.assertEqual(rawFile.getMetadata(metadataKeys = ('nentries',)), {'/afs/cern.ch/atlas/offline/test/data11_cos.00182609.physics_CosmicCalo.merge.RAW._lb0100._SFO-ALL._0001.1.SFO-ALL._0001.1.10evts.data': {'nentries': 10}, '/afs/cern.ch/atlas/offline/test/data11_7TeV.00191920.physics_JetTauEtmiss.merge.RAW._lb0257._SFO-9._0001.1.10evts': {'nentries': 10}, '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.RAW._lb0300._SFO-10._0001.1.10evts.data': {'nentries': 10}}) + self.assertEqual(rawFile.getMetadata(metadataKeys = ('nentries',), files = '/afs/cern.ch/atlas/offline/test/data11_cos.00182609.physics_CosmicCalo.merge.RAW._lb0100._SFO-ALL._0001.1.SFO-ALL._0001.1.10evts.data'), {'/afs/cern.ch/atlas/offline/test/data11_cos.00182609.physics_CosmicCalo.merge.RAW._lb0100._SFO-ALL._0001.1.SFO-ALL._0001.1.10evts.data': {'nentries': 10}}) + + self.assertEqual(rawFile.prodsysDescription['type'],'file') + except OSError: + # With python 2.7 this should call the self.skip() method + print >>sys.stderr, 'WARNING Skipping test_argAthenaMultiFileMetadata - stat on AFS test file failed' + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfArgs.py b/Tools/PyJobTransforms/test/test_trfArgs.py new file mode 100755 index 0000000000000000000000000000000000000000..af8e9c0e85e2909247f6d50eac361403de562323 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfArgs.py @@ -0,0 +1,106 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfArgs.py +# @brief Unittests for trfArgs.py +# @author maddocks.harvey@gmail.com, graeme.andrew.stewart@cern.ch +# @version $Id: test_trfArgs.py 534178 2013-01-21 19:04:08Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfArgs import * +from PyJobTransforms.trfArgClasses import trfArgParser + +## Unittests for trfArgs module +class trfArgsUnitTests(unittest.TestCase): + + def test_argsok(self): + myParser = trfArgParser(description='Test parser for argsok, %s' % __name__) + addTeaArguments(myParser) + args = ['--cupsOfTea', '3'] + self.assertTrue(isinstance(myParser.parse_args(args), argparse.Namespace)) + + # Stop this test from being noisy - argparse spits out to stderr + # and here we expect a failure + @silent + def test_argsbad(self): + myParser = trfArgParser(description='Test parser for argsbad, %s' % __name__) + addStandardTrfArgs(myParser) + args = ['--verbose', '--outputFile', 'someFile', '--runNumber', '1234', + '--unknownoption', 'what'] + self.assertRaises(SystemExit, myParser.parse_args, args) + + @silent + def test_help(self): + myParser = trfArgParser(description='Test parser for trf help, %s' % __name__) + addStandardTrfArgs(myParser) + args = ['--help'] + self.assertRaises(SystemExit, myParser.parse_args, args) + + def test_subStep(self): + myParser = trfArgParser() + addStandardTrfArgs(myParser) + addAthenaArguments(myParser) + args = ['--preExec', 'r2e:stuff', 'e2e:something', 'r2e:somemorestuff', 'e2e:somethingElse'] + myArgDict = vars(myParser.parse_args(args)) + properArgDict = {'r2e': ['stuff', 'somemorestuff'], 'e2e': ['something', 'somethingElse']} + self.assertTrue(isinstance(myArgDict, dict)) + self.assertEquals(myArgDict['preExec']._value, properArgDict) + + def test_Pickle(self): + myParser = trfArgParser(description='test parser for pickled arguments, %s' % __name__) + addStandardTrfArgs(myParser) + addAthenaArguments(myParser) + import pickle + pickleArgs = {'cupsOfTea' : '3', 'mugVolume' : '314.56', 'teaType' : 'earl grey', 'drinkers': 'graeme,bjorn,mark'} + pickle.dump(pickleArgs, open("testorama", "wb")) + from PyJobTransforms.transform import transform + tf = transform() + addTeaArguments(tf.parser) + self.assertEquals(tf.parseCmdLineArgs(['--argdict', 'testorama']), None) + try: + os.unlink('testorama') + except OSError: + pass + + +class trfIntArgsUnitTests(unittest.TestCase): + + @silent + def test_EchoInt(self): + from PyJobTransforms.transform import transform + tf = transform() + addTeaArguments(tf.parser) + self.assertEquals(tf.parseCmdLineArgs(['--cupsOfTea', '123']), None) + + @silent + def test_EchoIntFail(self): + from PyJobTransforms.transform import transform + tf = transform() + addTeaArguments(tf.parser) + self.assertRaises(SystemExit, tf.parseCmdLineArgs, ['--cupsOfTea', 'HELLO']) + +class trfFloatArgsUnitTests(unittest.TestCase): + + @silent + def test_EchoFloat(self): + from PyJobTransforms.transform import transform + tf = transform() + addTeaArguments(tf.parser) + self.assertEquals(tf.parseCmdLineArgs(['--mugVolume', '1.23']), None) + + @silent + def test_EchoFloatFail(self): + from PyJobTransforms.transform import transform + tf = transform() + addTeaArguments(tf.parser) + self.assertRaises(SystemExit, tf.parseCmdLineArgs, ['--mugVolume', 'LOL']) + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfDecorators.py b/Tools/PyJobTransforms/test/test_trfDecorators.py new file mode 100755 index 0000000000000000000000000000000000000000..5934d847c891112ef299c4730423197bf3fe7e9f --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfDecorators.py @@ -0,0 +1,24 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfDecorators.py +# @brief Unittests for trfDecorators.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfDecorators.py 529035 2012-12-05 15:45:24Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfDecorators import * + +## Unittests for this module +class trfDecoratorTests(unittest.TestCase): + pass + +if __name__ == '__main__': + unittest.main() + diff --git a/Tools/PyJobTransforms/test/test_trfEnv.py b/Tools/PyJobTransforms/test/test_trfEnv.py new file mode 100755 index 0000000000000000000000000000000000000000..c9adf5050e22de6fd6879c231b2dae05f98b70a0 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfEnv.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfEnv.py +# @brief Unittests for trfEnv.py +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: test_trfEnv.py 588222 2014-03-18 14:37:06Z graemes $ + +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +msg.info('logging set in %s' % sys.argv[0]) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfEnv import * + +# Unittests for this module +class trfEnvTests(unittest.TestCase): + + def test_trfEnvinit(self): + # Test we can get an environmentUpdate object + envUp = environmentUpdate() + self.assertTrue(isinstance(envUp, environmentUpdate)) + pVar = pathVar('PATH') + self.assertTrue(isinstance(pVar, pathVar)) + + def test_pathAdd(self): + pVar = pathVar('PATH') + pVar.add('/tmp') + self.assertEqual(pVar._value[0], '/tmp') + self.assertEqual(pVar._value[1], pVar._currentEnvironmentValue) + pVar.add('/somepaththatdoesnotexist') + self.assertNotEqual(pVar._value[0], '/somepaththatdoesnotexist') + + def test_pathStrValue(self): + pVar = pathVar('foo', testForExistance=False) + pVar.add('bar') + pVar.add('baz') + self.assertEqual(str(pVar), "baz:bar") + + def testEnvUp(self): + from PyJobTransforms.trfArgClasses import argSubstepList, argSubstepBool + envUp = environmentUpdate() + argDict = {'env': argSubstepList(["KEY1=VALUE1","KEY2=VALUE2","KEY3=VALUE3"]), + 'imf': argSubstepBool('True')} + envUp.setStandardEnvironment(argDict) + print envUp.values + print envUp._envdict + self.assertTrue("KEY1" in envUp._envdict) + self.assertTrue("LD_PRELOAD" in envUp._envdict) + + +if __name__ == '__main__': + unittest.main() + diff --git a/Tools/PyJobTransforms/test/test_trfExceptions.py b/Tools/PyJobTransforms/test/test_trfExceptions.py new file mode 100755 index 0000000000000000000000000000000000000000..54a2b9ba757bf87c09e4e343094c6ef878d094de --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfExceptions.py @@ -0,0 +1,45 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfExceptions.py +# @brief Unittests for trfExceptions.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfExceptions.py 570543 2013-11-14 21:52:16Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfExceptions import * + + + +## Unittests for this module +class trfExceptionTests(unittest.TestCase): + + def test_baseException(self): + e = TransformException(1, 'A simple exception message') + self.assertTrue(isinstance(e, TransformException)) + + def test_inheritedException(self): + e = TransformLogfileErrorException(1, 'A simple exception message') + self.assertTrue(isinstance(e, TransformLogfileErrorException)) + self.assertTrue(isinstance(e, TransformException)) + + def test_getters(self): + e = TransformLogfileErrorException(1, 'A simple exception message') + self.assertEqual(e.errCode, 1) + self.assertEqual(e.errMsg, 'A simple exception message') + + def test_errMsgSetter(self): + e = TransformLogfileErrorException(1, 'A simple exception message') + e.errMsg = 'A new message' + self.assertEqual(e.errMsg, 'A new message') + + +if __name__ == '__main__': + unittest.main() + \ No newline at end of file diff --git a/Tools/PyJobTransforms/test/test_trfExe.py b/Tools/PyJobTransforms/test/test_trfExe.py new file mode 100755 index 0000000000000000000000000000000000000000..f74b4fd16445bf23b40498c9be8bcbac7cad6358 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfExe.py @@ -0,0 +1,33 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfExe.py +# @brief Unittests for trfExe.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfExe.py 542755 2013-03-27 15:11:16Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfExe import * + +import PyJobTransforms.trfExceptions as trfExceptions + +## Unittests for this module +class trfExeTests(unittest.TestCase): + + def test_basicExecutor(self): + exe = transformExecutor() + self.assertTrue(isinstance(exe, transformExecutor)) + + def test_badExecutor(self): + # Executors are not allowed to consume and produce the same data type + self.assertRaises(trfExceptions.TransformSetupException, transformExecutor, inData=['RAW'], outData=['RAW']) + +if __name__ == '__main__': + unittest.main() + \ No newline at end of file diff --git a/Tools/PyJobTransforms/test/test_trfExitCodes.py b/Tools/PyJobTransforms/test/test_trfExitCodes.py new file mode 100755 index 0000000000000000000000000000000000000000..3fda65ce0ac4e8bc80e0d159cd0d783ab1eb1e16 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfExitCodes.py @@ -0,0 +1,36 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfExitCodes.py +# @brief Unittests for trfExitCodes.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfExitCodes.py 542702 2013-03-27 10:49:10Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfExitCodes import * + + +## Unittests for this module +class trfExitTests(unittest.TestCase): + + def test_exitTestsNameToCode(self): + self.assertEqual(trfExit.nameToCode('OK'), 0) + self.assertEqual(trfExit.nameToCode('TRF_SETUP'), 3) + + def test_exitTestsCodeToName(self): + self.assertEqual(trfExit.codeToName(0), 'OK') + self.assertEqual(trfExit.codeToName(3), 'TRF_SETUP') + + def test_exitTestsNameToDesc(self): + self.assertEqual(trfExit.nameToDesc('OK'), 'Successful exit') + self.assertEqual(trfExit.nameToDesc('TRF_SETUP'), 'Transform setup error') + +if __name__ == '__main__': + unittest.main() + \ No newline at end of file diff --git a/Tools/PyJobTransforms/test/test_trfFileUtils.py b/Tools/PyJobTransforms/test/test_trfFileUtils.py new file mode 100755 index 0000000000000000000000000000000000000000..268f6bc291412cf27c2e15e3f49fb509f9026997 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfFileUtils.py @@ -0,0 +1,22 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfFileUtils.py +# @brief Unittests for trfFileUtils.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfFileUtils.py 529035 2012-12-05 15:45:24Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfFileUtils import * + + +## Unittests for this module +# Write me! +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfGraph.py b/Tools/PyJobTransforms/test/test_trfGraph.py new file mode 100755 index 0000000000000000000000000000000000000000..321b71fc4748e5e6d67709e0563530b5152d9f44 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfGraph.py @@ -0,0 +1,48 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfGraph.py +# @brief Unittests for trfGraph.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfGraph.py 542755 2013-03-27 15:11:16Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfGraph import * + +import PyJobTransforms.trfExceptions as trfExceptions + +## Unit tests +class graphNodeTests(unittest.TestCase): + + def setUp(self): + self.gn1 = graphNode('testNode1', ['ESD'], ['AOD']) + self.gn2 = graphNode('testNode2', ['ESD', 'ESD1'], ['AOD', 'HIST_AOD']) + self.gn3 = graphNode('testNode3', [('HIST_AOD', 'HIST_ESD')], ['HIST']) + self.gn4 = graphNode('testNode4', ['BS', 'RDO'], ['ESD']) + + def test_graphNodeBasicNodeProperties(self): + self.assertEqual(self.gn1.name, 'testNode1') + self.assertEqual(self.gn1.inData, set(['ESD'])) + self.assertEqual(self.gn1.outData, set(['AOD'])) + self.assertEqual(self.gn1.connections, {'in':{}, 'out':{}}) + self.assertEqual(self.gn2.name, 'testNode2') + self.assertEqual(self.gn2.inData, set(['ESD', 'ESD1'])) + self.assertEqual(self.gn2.outData, set(['AOD', 'HIST_AOD'])) + self.assertEqual(self.gn2.outputDataTypes, set(['HIST_AOD', 'AOD'])) + self.assertEqual(self.gn3.inputDataTypes, set(['HIST_AOD', 'HIST_ESD'])) + + def test_graphNodeConnections(self): + pass + + def test_badDataFlow(self): + self.assertRaises(trfExceptions.TransformSetupException, executorGraph, executorSet=set(), inputData = ['RAW'], outputData = ['RAW']) + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfJobOptions.py b/Tools/PyJobTransforms/test/test_trfJobOptions.py new file mode 100755 index 0000000000000000000000000000000000000000..39ba519387e74d03ebdda780ab70c6f78cfd49f4 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfJobOptions.py @@ -0,0 +1,35 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfJobOptions.py +# @brief Unittests for trfJobOptions.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfJobOptions.py 553586 2013-07-04 14:41:56Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfJobOptions import * + +## Unit tests +class jobOptionsTests(unittest.TestCase): + + def test_runArguments(self): + ra = RunArguments() + ra.foo = 'bar' + ra.baz = ['some', 'list'] + self.assertTrue('foo' in dir(ra) and 'baz' in dir(ra)) + self.assertEqual(ra.foo, 'bar') + self.assertEqual(ra.baz, ['some', 'list']) + self.assertFalse(hasattr(ra, 'nothingToSee')) + + # It would be good to have a test of actually writing runargs, + # but note this really requires a whole transform+executor + +## Unittests for this module +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfLogger.py b/Tools/PyJobTransforms/test/test_trfLogger.py new file mode 100755 index 0000000000000000000000000000000000000000..b6bab92f25395be29ac63dce568568654f2e841e --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfLogger.py @@ -0,0 +1,22 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfLogger.py +# @brief Unittests for trfLogger.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfLogger.py 529035 2012-12-05 15:45:24Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfLogger import * + + +## Unittests for this module +# Write me! +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfReports.py b/Tools/PyJobTransforms/test/test_trfReports.py new file mode 100755 index 0000000000000000000000000000000000000000..918b9171bfc53a6172b68b712b191053aeda4d72 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfReports.py @@ -0,0 +1,343 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfReports.py +# @brief Unittests for trfReports.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfReports.py 610120 2014-08-04 13:34:16Z volkmer $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfReports import * + +## Unittests for this module +class trfJobReportUnitTests(unittest.TestCase): + + def setUp(self): + self.report = trfJobReport(parentTrf = None) + self.__compressionInput = {'input': [{'dataset': None, + 'nentries': 1541, + 'subFiles': [{'file_guid': '1CE618EF-AF8A-E111-A3C0-003048D37440', + 'name': 'data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1'}], + 'type': 'BS'}], + 'output': [{'argName': 'outputAODFile', + 'dataset': None, + 'subFiles': [{'AODFixVersion': '', + 'beam_energy': [4000000.0], + 'beam_type': ['collisions'], + 'conditions_tag': 'COMCOND-BLKPA-006-01', + 'file_guid': 'C9D861B6-9180-2642-9914-879FC1EA44DF', + 'file_size': 3461628L, + 'file_type': 'pool', + 'geometry': 'ATLAS-GEO-20-00-01', + 'integrity': True, + 'lumi_block': [423L], + 'name': 'my.AOD.pool.root', + 'nentries': 10L, + 'run_number': [201556L]}], + 'type': 'AOD'}, + {'argName': 'outputESDFile', + 'dataset': None, + 'subFiles': [{'AODFixVersion': '', + 'beam_energy': [4000000.0], + 'beam_type': ['collisions'], + 'conditions_tag': 'COMCOND-BLKPA-006-01', + 'file_guid': '4CD89E10-42DD-E34C-BF1F-545CEE19E4FD', + 'file_size': 26391289L, + 'file_type': 'pool', + 'geometry': 'ATLAS-GEO-20-00-01', + 'integrity': True, + 'lumi_block': [423L], + 'name': 'my.ESD.pool.root', + 'nentries': 10L, + 'run_number': [201556L]}], + 'type': 'ESD'}]} + self.__compressionOutput = {'input': [{'dataset': None, + 'nentries': 1541, + 'subFiles': [{'file_guid': '1CE618EF-AF8A-E111-A3C0-003048D37440', + 'name': 'data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1'}], + 'type': 'BS'}], + 'output': {'common': {'AODFixVersion': '', + 'beam_energy': [4000000.0], + 'beam_type': ['collisions'], + 'conditions_tag': 'COMCOND-BLKPA-006-01', + 'dataset': None, + 'file_type': 'pool', + 'geometry': 'ATLAS-GEO-20-00-01', + 'integrity': True, + 'lumi_block': [423L], + 'nentries': 10L, + 'run_number': [201556L]}, + 'files': [{'argName': 'outputAODFile', + 'subFiles': [{'file_guid': 'C9D861B6-9180-2642-9914-879FC1EA44DF', + 'file_size': 3461628L, + 'name': 'my.AOD.pool.root'}], + 'type': 'AOD'}, + {'argName': 'outputESDFile', + 'subFiles': [{'file_guid': '4CD89E10-42DD-E34C-BF1F-545CEE19E4FD', + 'file_size': 26391289L, + 'name': 'my.ESD.pool.root'}], + 'type': 'ESD'}]}} + + def tearDown(self): + pass + + def test_compressedFileReport(self): + compressed = self.report._compressFileDict(self.__compressionInput) + self.assertEqual(compressed, self.__compressionOutput) + + + def test_compressedFileReportOneMissing(self): + del self.__compressionInput['output'][0]['subFiles'][0]['nentries'] + del self.__compressionOutput['output']['common']['nentries'] + self.__compressionOutput['output']['files'][1]['subFiles'][0]['nentries'] = 10L + + compressed = self.report._compressFileDict(self.__compressionInput) + self.assertEqual(compressed, self.__compressionOutput) + + def test_compressedFileReportTypeDiff(self): + self.__compressionInput['output'][0]['subFiles'][0]['nentries'] = 10.0 + del self.__compressionOutput['output']['common']['nentries'] + self.__compressionOutput['output']['files'][0]['subFiles'][0]['nentries'] = 10.0 + self.__compressionOutput['output']['files'][1]['subFiles'][0]['nentries'] = 10L + + compressed = self.report._compressFileDict(self.__compressionInput) + self.maxDiff = None + self.assertEqual(compressed, self.__compressionOutput) + + def test_compressedFileReportNoCompression(self): + del self.__compressionInput['output'][0]['subFiles'][0]['AODFixVersion'] + del self.__compressionInput['output'][0]['subFiles'][0]['beam_energy'] + del self.__compressionInput['output'][0]['subFiles'][0]['beam_type'] + del self.__compressionInput['output'][0]['subFiles'][0]['conditions_tag'] + del self.__compressionInput['output'][0]['subFiles'][0]['file_type'] + del self.__compressionInput['output'][0]['subFiles'][0]['integrity'] + del self.__compressionInput['output'][0]['subFiles'][0]['lumi_block'] + del self.__compressionInput['output'][0]['subFiles'][0]['run_number'] + del self.__compressionInput['output'][0]['subFiles'][0]['geometry'] + del self.__compressionInput['output'][0]['subFiles'][0]['nentries'] + del self.__compressionInput['output'][0]['dataset'] + + compressed = self.report._compressFileDict(self.__compressionInput) + self.assertEqual(compressed, self.__compressionInput) + + + def test_compressedFileReportSubFileCompression(self): + subFilesCompressed = {'argName': 'inputTEST_MULTIFileCompress', + 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', + 'file_size': 20, + 'name': 'file1', + 'niceness': 1234567}, + {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', + 'file_size': 13, + 'name': 'file2', + 'niceness': 1234567}, + {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', + 'file_size': 174, + 'name': 'file3'}], + 'type': None} + self.__compressionInput['input'].append(subFilesCompressed) + + compressed = self.report._compressFileDict(self.__compressionInput) + self.assertEqual(compressed['output'], self.__compressionOutput['output']) + self.assertEqual(compressed['input'], self.__compressionInput['input']) + + def test_compressedFileReportSubFileCompression2(self): + subFilesCompressedInput = {'argName': 'inputTEST_MULTIFileCompress', + 'common': {'generator': 'Pythia', + 'integrity': True + }, + 'dataset': None, + 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', + 'file_size': 20, + 'name': 'file1', + 'niceness': 1234567}, + {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', + 'file_size': 13, + 'name': 'file2', + 'niceness': 1234567}, + {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', + 'file_size': 174, + 'niceness': 1234567, + 'name': 'file3'}], + 'type': None} + subFilesCompressedInputCompressed = {'common': {'dataset': None, + 'generator': 'Pythia' + }, + 'files': [{'nentries': 1541, + 'subFiles': [{'file_guid': '1CE618EF-AF8A-E111-A3C0-003048D37440', + 'name': 'data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1'}], + 'type': 'BS'}, + {'argName': 'inputTEST_MULTIFileCompress', + 'common': {'integrity': True}, + 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', + 'file_size': 20, + 'name': 'file1', + 'niceness': 1234567}, + {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', + 'file_size': 13, + 'name': 'file2', + 'niceness': 1234567}, + {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', + 'file_size': 174, + 'name': 'file3', + 'niceness': 1234567}], + 'type': None}]} + + self.__compressionInput['input'][0]['generator'] = 'Pythia' + self.__compressionInput['input'].append(subFilesCompressedInput) + self.__compressionOutput['input'] = subFilesCompressedInputCompressed + + compressed = self.report._compressFileDict(self.__compressionInput) + self.assertEqual(compressed, self.__compressionOutput) + + +## Unittests for this module +class trfFileReportUnitTests(unittest.TestCase): + + def setUp(self): + # In python 2.7 support for multiple 'with' expressions becomes available + with open('file1', 'w') as f1: + print >> f1, 'This is test file 1' + with open('file2', 'w') as f2: + print >> f2, 'Short file 2' + with open('file3', 'w') as f3: + print >> f3, 80 * '-', 'Long file 3', 80 * '-' + + # For test reports, use manually set GUIDs + self.mySingleFile = trfArgClasses.argFile(['file1'], name = 'inputTEST_SINGLEFile', io = 'input', guid = {'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B'}) + self.mySingleFileReport = trfFileReport(self.mySingleFile) + + self.myMultiFile = trfArgClasses.argFile(['file1', 'file2', 'file3'], name = 'inputTEST_MULTIFile', io = 'input', guid = {'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'file2': '1368D295-27C6-4A92-8187-704C2A6A5864', 'file3': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9'}) + self.myMultiFileReport = trfFileReport(self.myMultiFile) + + self.myMultiFileCompress1 = trfArgClasses.argFile(['file1', 'file2', 'file3'], name = 'inputTEST_MULTIFileCompress', io = 'input', guid = {'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'file2': '1368D295-27C6-4A92-8187-704C2A6A5864', 'file3': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9'}) + self.myMultiFileCompress1._setMetadata(files = None, metadataKeys = {'niceness': 1234567, 'generator': 'Pythia'}) + self.myMultiFileCompressReport1 = trfFileReport(self.myMultiFileCompress1) + + self.myMultiFileCompress2 = trfArgClasses.argFile(['file1', 'file2', 'file3'], name = 'inputTEST_MULTIFileCompress', io = 'input', guid = {'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'file2': '1368D295-27C6-4A92-8187-704C2A6A5864', 'file3': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9'}) + self.myMultiFileCompress2._setMetadata(files = ['file1', 'file2'], metadataKeys = {'niceness': 1234567, 'generator': 'Pythia'}) + self.myMultiFileCompress2._setMetadata(files = ['file3'], metadataKeys = {'generator': 'Pythia'}) + self.myMultiFileCompressReport2 = trfFileReport(self.myMultiFileCompress2) + + def tearDown(self): + for f in 'file1', 'file2', 'file3': + try: + os.unlink(f) + except OSError: + pass + + def test_fileReportPython(self): + self.assertEqual(self.mySingleFileReport.singleFilePython(filename = 'file1'), {'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'integrity': True, 'name': 'file1', 'file_size': 20}) + self.assertEqual(self.mySingleFileReport.python(), {'argName': 'inputTEST_SINGLEFile', 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'integrity': True, 'name': 'file1', 'file_size': 20}], 'type': None, 'dataset': None}) + self.assertEqual(self.mySingleFileReport.python(type = 'name'), {'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'name': 'file1'}], 'nentries': 'UNDEFINED', 'dataset': None}) + + def test_fileReportPythonUnknown(self): + self.assertRaises(trfExceptions.TransformReportException, self.mySingleFileReport.singleFilePython, filename = 'xxx') + + def test_multiFileReportPython(self): + self.assertEqual(self.myMultiFileReport.singleFilePython(filename = 'file1'), {'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'integrity': True, 'name': 'file1', 'file_size': 20}) + self.assertEqual(self.myMultiFileReport.singleFilePython(filename = 'file2'), {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', 'integrity': True, 'name': 'file2', 'file_size': 13}) + self.assertEqual(self.myMultiFileReport.singleFilePython(filename = 'file3'), {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', 'integrity': True, 'name': 'file3', 'file_size': 174}) + self.assertEqual(self.myMultiFileReport.python(type = 'full'), {'argName': 'inputTEST_MULTIFile', 'common' : {'integrity' : True}, 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'name': 'file1', 'file_size': 20}, {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', 'name': 'file2', 'file_size': 13}, {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', 'name': 'file3', 'file_size': 174}], 'type': None, 'dataset': None}) + self.assertEqual(self.myMultiFileReport.python(type = 'name'), {'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'name': 'file1'}, {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', 'name': 'file2'}, {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', 'name': 'file3'}], 'nentries': 'UNDEFINED', 'dataset': None}) + + + def test_CommonBlockCleanUp(self): + compressedResult1 = {'argName': 'inputTEST_MULTIFileCompress', + 'common': {'generator': 'Pythia', 'integrity': True, 'niceness': 1234567}, + 'dataset': None, + 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', + 'file_size': 20, + 'name': 'file1'}, + {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', + 'file_size': 13, + 'name': 'file2'}, + {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', + 'file_size': 174, + 'name': 'file3'}], + 'type': None} + + compressedResult2 = {'argName': 'inputTEST_MULTIFileCompress', + 'common': {'generator': 'Pythia', 'integrity': True}, + 'dataset': None, + 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', + 'file_size': 20, + 'name': 'file1', + 'niceness': 1234567}, + {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', + 'file_size': 13, + 'name': 'file2', + 'niceness': 1234567}, + {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', + 'file_size': 174, + 'name': 'file3'}], + 'type': None} + + metaDataIgnored = {'dataset': None, + 'nentries': 'UNDEFINED', + 'subFiles': [{'file_guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', + 'name': 'file1'}, + {'file_guid': '1368D295-27C6-4A92-8187-704C2A6A5864', + 'name': 'file2'}, + {'file_guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', + 'name': 'file3'}]} + + self.assertEqual(self.myMultiFileCompressReport1.python(type = 'full'), compressedResult1) + self.assertEqual(self.myMultiFileCompressReport2.python(type = 'full'), compressedResult2) + self.assertEqual(self.myMultiFileCompressReport1.python(type = 'name'), metaDataIgnored) + self.assertEqual(self.myMultiFileCompressReport2.python(type = 'name'), metaDataIgnored) + + +class machineReportUnitTests(unittest.TestCase): + # Following tests don't test content, just that the generation of the objects happens correctly + def test_machineReportPython(self): + self.assertTrue(isinstance(machineReport().python(), dict)) + + +class trfExeReportUnitTests(unittest.TestCase): + def test_exeReportPython(self): + try: + from PyJobTransforms.trfExe import transformExecutor + except ImportError: + from trfExe import transformExecutor + exeReport = trfExecutorReport(transformExecutor(name = 'unittest dummy')) + self.assertTrue(isinstance(exeReport.python(), dict)) + + +class trfReportWriterUnitTests(unittest.TestCase): + def tearDown(self): + try: + os.unlink('testReport') + except OSError: + pass + + # These tests are minimal - just check the methods don't raise exceptions + def test_textReportGenerator(self): + self.assertTrue(trfReport().writeTxtReport(filename = 'testReport') == None) + + def test_GPickleReportGenerator(self): + self.assertTrue(trfReport().writeGPickleReport(filename = 'testReport') == None) + + def test_ClassicXMLReportGenerator(self): + self.assertTrue(trfReport().writeClassicXMLReport(filename = 'testReport') == None) + + def test_JSONReportGenerator(self): + self.assertTrue(trfReport().writeJSONReport(filename = 'testReport') == None) + + + +class trfClassicXMLReportTest(unittest.TestCase): + + def setUp(self): + # Initialise a fake transform + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfSignal.py b/Tools/PyJobTransforms/test/test_trfSignal.py new file mode 100755 index 0000000000000000000000000000000000000000..f780c4107348644f0c398fb69139977b62d2a211 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfSignal.py @@ -0,0 +1,22 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfSignal.py +# @brief Unittests for trfSignal.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfSignal.py 529035 2012-12-05 15:45:24Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfSignal import * + + +## Unittests for this module +# Write me! +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfUtils.py b/Tools/PyJobTransforms/test/test_trfUtils.py new file mode 100755 index 0000000000000000000000000000000000000000..b6f8b30cadc0283185951f47a65911a40bd0e733 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfUtils.py @@ -0,0 +1,92 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfUtils.py +# @brief Unittests for trfUtils.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfUtils.py 594679 2014-04-29 14:15:19Z graemes $ + +import unittest +import os +import pwd +import subprocess +import time + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfUtils import * + +from PyJobTransforms.trfDecorators import timelimited + +## Unittests for this module + +class trfUtilsInfanticide(unittest.TestCase): + def setUp(self): + self.waitWrapper = "wait.sh" + self.exitWrapper = "exit.sh" + + waitFile = open(self.waitWrapper, "w") + print >>waitFile, "#!/bin/sh" + print >>waitFile, "sleep 100" + waitFile.close() + os.chmod(self.waitWrapper, 00755) + + exitFile = open(self.exitWrapper, "w") + print >>exitFile, "#!/bin/sh" + print >>exitFile, "sleep 100 &" + exitFile.close() + os.chmod(self.exitWrapper, 00755) + + + def teardown(self): + for f in (self.waitWrapper, self.exitWrapper): + os.unlink(f) + + def test_basicKiller(self): + p = subprocess.Popen(['sleep', '100']) +# print subprocess.check_output(['pstree', '-p', pwd.getpwuid(os.getuid())[0]]) +# print subprocess.check_output(['ps', 'ax', '-o', 'pid,ppid,pgid,args', '-m']) + myWeans = listChildren(listOrphans = False) + self.assertEqual(len(myWeans), 1) + self.assertEqual(myWeans[0], p.pid) + infanticide(myWeans) + p.wait() + + def test_multiKiller(self): + p = subprocess.Popen(["./{0}".format(self.waitWrapper)]) + time.sleep(10) # Better let children get started! +# print subprocess.check_output(['pstree', '-p', pwd.getpwuid(os.getuid())[0]]) + myWeans = listChildren(listOrphans = False) +# print myWeans + self.assertEqual(len(myWeans), 2) + self.assertEqual(myWeans[1], p.pid) + infanticide(myWeans) + p.wait() + myWeans = listChildren(listOrphans = False) +# print myWeans +# print subprocess.check_output(['ps', 'guxw']) + self.assertEqual(len(myWeans), 0) + + # This is just too hard and too dangerous to test +# def test_orphanKiller(self): +# p = subprocess.Popen(["./{0}".format(self.exitWrapper)]) +# time.sleep(1) +# # print subprocess.check_output(['ps', 'ax', '-o', 'pid,ppid,pgid,args', '-m']) +# p.poll() +# myWeans = listChildren(listOrphans = True) +# self.assertGreaterEqual(len(myWeans), 1) +# infanticide(myWeans) +# p.wait() # This is important to clean up zombies +# myWeans = listChildren(listOrphans = True) +# # print subprocess.check_output(['ps', 'guxw']) +# self.assertGreaterEqual(len(myWeans), 0) + +# @timelimited(timeout=10, sleeptime=1) +# def test_timelimitedKiller(self): + + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfUtilsDBRelease.py b/Tools/PyJobTransforms/test/test_trfUtilsDBRelease.py new file mode 100755 index 0000000000000000000000000000000000000000..243b7d68cfbe731705b18f6a2ebcf87f325f8ccd --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfUtilsDBRelease.py @@ -0,0 +1,114 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration +# +# Test the various DBRelease scenarios +# $Id: test_trfUtilsDBRelease.py 580319 2014-01-27 16:14:20Z graemes $ +# + +import json +import subprocess +import os +import os.path +import shutil +import sys +import unittest + +from PyJobTransforms.trfExitCodes import trfExit as trfExit + +from PyJobTransforms.trfLogger import msg + +class DBReleasetest(unittest.TestCase): + + # Standard setup using CVMFS + def test_cvmfsStandard(self): + cmd = ['Athena_tf.py', '--DBRelease', '23.3.1'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Setup using CVMFS 'current' + def test_cvmfsCurrent(self): + cmd = ['Athena_tf.py', '--DBRelease', 'current'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Test using a DBRelease file which exists + def test_tarball(self): + try: + os.symlink('/afs/cern.ch/work/g/graemes/ddm/ddo.000001.Atlas.Ideal.DBRelease.v220701/DBRelease-22.7.1.tar.gz', 'DBRelease-22.7.1.tar.gz') + except (IOError, OSError) as (errno, errMsg): + # Ignore file exists - if that happens the link was already there + if errno == 17: + pass + else: + raise + cmd = ['Athena_tf.py', '--DBRelease', 'DBRelease-22.7.1.tar.gz'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Test using a DBRelease file which exists, absolute path (this should _not_ rerun the setup script, of course) + def test_tarballAbsPath(self): + cmd = ['Athena_tf.py', '--DBRelease', '/afs/cern.ch/work/g/graemes/ddm/ddo.000001.Atlas.Ideal.DBRelease.v220701/DBRelease-22.7.1.tar.gz'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Test using a DBRelease file which doesn't exist, but should fallback to CVMFS + def test_tarballFallback(self): + cmd = ['Athena_tf.py', '--DBRelease', 'DBRelease-23.3.1.tar.gz'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Negative test - use an illegal name format + def test_illegalName(self): + cmd = ['Athena_tf.py', '--DBRelease', 'FailMeHarder'] + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, trfExit.nameToCode('TRF_DBRELEASE_PROBLEM')) + +def tearDownModule(): + if os.path.isdir('DBRelease'): + shutil.rmtree('DBRelease') + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfUtilsParallelJobProcessor.py b/Tools/PyJobTransforms/test/test_trfUtilsParallelJobProcessor.py new file mode 100755 index 0000000000000000000000000000000000000000..add44238c29485c3219fb970880e9c1dde18f225 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfUtilsParallelJobProcessor.py @@ -0,0 +1,169 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_ParallelJobProcessor.py +# @brief Unittests for trfUtils.py ParallelJobProcessor +# @author william.dmitri.breaden.madden@cern.ch + +import inspect +import fnmatch +import os +import re +import sys +import unittest +import logging + +from PyUtils import RootUtils +from PyJobTransforms.trfExitCodes import trfExit +from PyJobTransforms.trfArgClasses import argFile +import PyJobTransforms.trfExceptions as trfExceptions + +#import PyJobTransforms.trfUtils as trfUtils +from PyJobTransforms.trfUtils import * + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfLogger import stdLogLevels, stdLogLevelsByCritcality + +from PyJobTransforms.trfExitCodes import trfExit +import PyJobTransforms.trfExceptions as trfExceptions + + +## @brief a simple function that returns a string and sleeps a specified time +# @param sleepTime sleep time specification +def helloWorld( + sleepTime = 5 + ): + time.sleep(sleepTime) + return "hello world" + + +## @brief a simple function that returns a product of multiplication +# @param multiplicand1 multiplicand +# @param multiplicand1 multiplicand +def multiply( + multiplicand1 = 0, + multiplicand2 = 0 + ): + return multiplicand1 * multiplicand2 + + +## @brief a simple function that returns its arguments +# @param **kwargs arguments +def returnArguments(**kwargs): + time.sleep(5) + return kwargs + + +## @brief a simple function sleeps a specified time +# @param sleepTime sleep time specification +def timeout( + sleepTime = 30 + ): + time.sleep(sleepTime) + return 0 + + +## @brief a simple function that raises a RuntimeError exception +def exception(): + raise RuntimeError("FAIL") + return 1 + + +## @brief unit tests for the parallel job processor +class TestParallelJobProcessor(unittest.TestCase): + ## @brief unit test for working functions + # @detail This method is a unit test of the parallel job processor + # testing the processing of two simple, working functions. + def test_working(self): + msg.info("\n\n\n\nPARALLEL JOB PROCESSOR WORKING TEST") + jobGroup1 = JobGroup( + name = "working functions test", + jobs = [ + Job( + name = "hello world function", + workFunction = helloWorld, + workFunctionKeywordArguments = { + 'sleepTime': 1, + }, + workFunctionTimeout = 10 + ), + Job( + name = "multiplication function", + workFunction = multiply, + workFunctionKeywordArguments = { + 'multiplicand1': 2, + 'multiplicand2': 3 + }, + workFunctionTimeout = 10 + ) + ] + ) + parallelJobProcessor1 = ParallelJobProcessor() + parallelJobProcessor1.submit(jobSubmission = jobGroup1) + results = parallelJobProcessor1.getResults() + self.assertEquals(results, ['hello world', 6]) + ## @brief unit test for timeout function + # @detail This method is a unit test of the parallel job processor + # testing the processing of a simple function that is used to cause a + # timeout. + def test_timeout(self): + msg.info("\n\n\n\nPARALLEL JOB PROCESSOR TIMEOUT TEST") + jobGroup1 = JobGroup( + name = "timeout test", + jobs=[ + Job( + name = "timeout function", + workFunction = timeout, + workFunctionKeywordArguments = { + 'sleepTime': 30, + }, + workFunctionTimeout = 1 + ) + ] + ) + parallelJobProcessor1 = ParallelJobProcessor() + parallelJobProcessor1.submit(jobSubmission = jobGroup1) + self.assertRaises( + trfExceptions.TransformTimeoutException, + parallelJobProcessor1.getResults + ) + ## @brief unit test for fail function + # @detail This method is a unit test of the parallel job processor + # testing the processing of a simple function that raises an exception. + def test_fail(self): + msg.info("\n\n\n\nPARALLEL JOB PROCESSOR FAIL TEST") + jobGroup1 = JobGroup( + name = "fail test", + jobs=[ + Job( + name = "fail function", + workFunction = exception, + workFunctionTimeout = 5 + ) + ] + ) + parallelJobProcessor1 = ParallelJobProcessor() + parallelJobProcessor1.submit(jobSubmission = jobGroup1) + self.assertRaises( + trfExceptions.TransformExecutionException, + parallelJobProcessor1.getResults + ) + ## @brief unit test for job information + # @detail This method is a unit test of the job object that raises an + # exception when a work function is not specified. + def test_job_information(self): + msg.info("\n\n\n\nPARALLEL JOB PROCESSOR JOB INFORMATION TEST") + self.assertRaises( + trfExceptions.TransformInternalException, + Job + ) + + +def main(): + msg.level = logging.DEBUG + unittest.main() + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/test/test_trfUtilsParallelJobProcessorData.py b/Tools/PyJobTransforms/test/test_trfUtilsParallelJobProcessorData.py new file mode 100755 index 0000000000000000000000000000000000000000..cc12c2317ec79db9762af793aea5b3b49a3c27d5 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfUtilsParallelJobProcessorData.py @@ -0,0 +1,238 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_ParallelJobProcessorData.py +# @brief Unittests for output file validation using trfUtils.py +# ParallelJobProcessor +# @author william.dmitri.breaden.madden@cern.ch + +import subprocess +import os +import os.path +import sys +import unittest +import logging + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfLogger import stdLogLevels, stdLogLevelsByCritcality + +# data files for unit tests +fileAOD1 = "/afs/cern.ch/user/w/wbreaden/public/temporary/AOD.01448536._003819.pool.root.1" +fileAOD2 = "/afs/cern.ch/user/w/wbreaden/public/temporary/AOD.01448536._003819.pool.root.1" +fileAOD3 = "/afs/cern.ch/user/w/wbreaden/public/temporary/AOD.01448536._003819.pool_corrupted.root.1" # (corrupted for test purposes) +fileESD1 = "/afs/cern.ch/user/w/wbreaden/public/temporary/my.ESD.pool.root" +fileHIST1 = "/afs/cern.ch/user/w/wbreaden/public/temporary/my.HIST.root" +fileTAG1 = "/afs/cern.ch/user/w/wbreaden/public/temporary/my.TAG.pool.root" +fileBS1 = "/afs/cern.ch/atlas/project/rig/referencefiles/dataStreams_high_mu/data12_8TeV/data12_8TeV.00201556.physics_JetTauEtmiss.merge.RAW._lb0423._SFO-1._0001.1" + + +## @brief unit tests for output data file validation using the parallel job +# processor +class TestOutputFileValidationInParallel(unittest.TestCase): + + ## @brief unit test for AOD + # @detail This method is a unit test for output file validation of AOD data + # using the parallel job processor. + def test_AOD(self): + msg.info("\n\n\n\nAOD OUTPUT FILE VALIDATION USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputAODFile', + fileAOD1, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + ## @brief unit test for ESD + # @detail This method is a unit test for output file validation of ESD data + # using the parallel job processor. + def test_ESD(self): + msg.info("\n\n\n\nESD OUTPUT FILE VALIDATION USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputESDFile', + fileESD1, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + ## @brief unit test for HIST + # @detail This method is a unit test for output file validation of HIST + # data using the parallel job processor. + def test_HIST(self): + msg.info("\n\n\n\nHIST OUTPUT FILE VALIDATION USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputHISTFile', + fileHIST1, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + ## @brief unit test for TAG + # @detail This method is a unit test for output file validation of TAG data + # using the parallel job processor. + def test_TAG(self): + msg.info("\n\n\n\nTAG OUTPUT FILE VALIDATION USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputTAGFile', + fileTAG1, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + ## @brief unit test for BS + # @detail This method is a unit test for output file validation of BS data + # using the parallel job processor. + def test_BS(self): + msg.info("\n\n\n\nBS OUTPUT FILE VALIDATION USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputBSFile', + fileBS1, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + ## @brief unit test for multiple AOD files + # @detail This method is a unit test for output file validation of AOD data + # using the parallel job processor. + def test_AOD_multiple_file__parallel_validations(self): + msg.info("\n\n\n\nAOD MULTIPLE OUTPUT FILE VALIDATIONS USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputAODFile', + fileAOD1, + fileAOD2, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + ## @brief unit test for corrupted AOD + # @detail This method is a unit test for output file validation of AOD data + # using the parallel job processor. + def test_AOD_corrupted(self): + msg.info("\n\n\n\nCORRUPTED AOD OUTPUT FILE VALIDATION USING PARALLEL JOB PROCESSOR") + cmd = [ + 'ValidateFiles_tf.py', + '--outputAODFile', + fileAOD3, + '--parallelFileValidation', + 'True', + '--verbose' + ] + p = subprocess.Popen( + cmd, + shell = False, + stdout = subprocess.PIPE, + stderr = subprocess.STDOUT, + bufsize = 1 + ) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Clean remaining buffered output lines. + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 75) + + +def main(): + msg.level = logging.DEBUG + unittest.main() + + +if __name__ == '__main__': + main() diff --git a/Tools/PyJobTransforms/test/test_trfValidateRootFile.py b/Tools/PyJobTransforms/test/test_trfValidateRootFile.py new file mode 100755 index 0000000000000000000000000000000000000000..229b4bf2a7a62d74813017c1fb15ba7fe82f7ba9 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfValidateRootFile.py @@ -0,0 +1,22 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfValidateRootFile.py +# @brief Unittests for trfValidateRootFile.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfValidateRootFile.py 529035 2012-12-05 15:45:24Z graemes $ + +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfValidateRootFile import * + + +## Unittests for this module +# Write me! +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfValidation.py b/Tools/PyJobTransforms/test/test_trfValidation.py new file mode 100755 index 0000000000000000000000000000000000000000..dd7e9f66fc5b661fe28a8d7864816a426569c886 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfValidation.py @@ -0,0 +1,238 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfValidation.py +# @brief Unittests for trfValidation.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfValidation.py 614626 2014-09-02 14:22:48Z volkmer $ + +import unittest + +import logging +import os + +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfValidation import * + +import PyJobTransforms.trfArgClasses as trfArgClasses + +from PyJobTransforms.trfValidation import eventMatch, athenaLogFileReport + +## Unittests for this module +class eventCheckTests(unittest.TestCase): + + def test_basicMatch(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 500}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_matchWithSkipEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 100}, + 'skipEvents': 400, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_matchWithMaxEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 100}, + 'skipEvents': None, + 'maxEvents': 100, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_matchWithSkipAndMaxEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 100}, + 'skipEvents': 100, + 'maxEvents': 100, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_matchWithSkipAndMaxEventsOvershoot(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 100}, + 'skipEvents': 400, + 'maxEvents': 200, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_tooFewEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 499}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_tooFewEventsWithSkipEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 399}, + 'skipEvents': 100, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_tooFewEventsWithMaxEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 199}, + 'skipEvents': None, + 'maxEvents': 200, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_tooFewEventsWithSkipAndMaxEvents(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 500}, + 'outEventDict': {'ESD': 199}, + 'skipEvents': 100, + 'maxEvents': 200, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_minEff(self): + evmatch = eventMatch(executor=None, eventCountConf={'Evgen': {'Hits': 'minEff'}}) + evmatch.configureCheck(override={'inEventDict': {'Evgen': 1000}, + 'outEventDict': {'Hits': 991}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_minEffTooFew(self): + evmatch = eventMatch(executor=None, eventCountConf={'Evgen': {'Hits': 'minEff'}}) + evmatch.configureCheck(override={'inEventDict': {'Evgen': 1000}, + 'outEventDict': {'Hits': 989}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_badConf(self): + evmatch = eventMatch(executor=None, eventCountConf={'Evgen': {'Hits': "unknownAction"}}) + evmatch.configureCheck(override={'inEventDict': {'Evgen': 1000}, + 'outEventDict': {'Hits': 989}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_badConfEfficency(self): + evmatch = eventMatch(executor=None, eventCountConf={'Evgen': {'Hits': 10.0}}) + evmatch.configureCheck(override={'inEventDict': {'Evgen': 1000}, + 'outEventDict': {'Hits': 989}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_minEffRounding(self): + # Test the eff * inputEvents is rounded correctly with floor() + evmatch = eventMatch(executor=None, eventCountConf={'Evgen': {'Hits': 'minEff'}}) + evmatch.configureCheck(override={'inEventDict': {'Evgen': 1}, + 'outEventDict': {'Hits': 0}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_globDataTypeMatch(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 100}, + 'outEventDict': {'DESD_SGLMU': 10}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_multiData(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 100}, + 'outEventDict': {'ESD': 100, 'DESD_SGLMU': 10}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertTrue(evmatch.decide()) + + def test_multiDataFail(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'BS': 100}, + 'outEventDict': {'ESD': 99, 'DESD_SGLMU': 10}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + def test_inputGlobWithFail(self): + evmatch = eventMatch(executor=None) + evmatch.configureCheck(override={'inEventDict': {'NTUP_ZEE': 100}, + 'outEventDict': {'NTUP_ZEE_MRG': 98}, + 'skipEvents': None, + 'maxEvents': None, + 'evAccEff': 0.99}) + self.assertRaises(trfExceptions.TransformValidationException, evmatch.decide) + + +## Unittests for this module +class athenaLogFileReportTests(unittest.TestCase): + + def setUp(self): + testLogExcerpt = ''' +16:00:59 Py:Generate INFO MetaData attributes: (None) +16:00:59 +-- AthSequencer/AthOutSeq +16:00:59 +-- AthSequencer/AthRegSeq +16:00:59 MetaData: physicsComment = Dijet truth jet slice JZ2W, with the AU2 CT10 tune +16:00:59 MetaData: generatorName = Pythia8 +16:00:59 MetaData: generatorTune = AU2 CT10 +16:00:59 MetaData: keywords = dijet, qcd +16:00:59 MetaData: specialConfig = NONE +16:00:59 MetaData: contactPhysicist = MC group +16:00:59 srcFilePrefix = /cvmfs/atlas.cern.ch/repo/sw/software/x86_64-slc6-gcc47-opt/19.0.3/AtlasSimulation/19.0.3/InstallArea/share, dirName = +16:00:59 srcFilePrefix = /cvmfs/atlas.cern.ch/repo/sw/software/x86_64-slc6-gcc47-opt/19.0.3/AtlasSimulation/19.0.3/InstallArea/share, dirName = ''' + + with open('file1', 'w') as f1: + print >> f1, 'This is test file 1 w/o meaning' + with open('file2', 'w') as f2: + print >> f2, testLogExcerpt + + self.myFileReport1 = athenaLogFileReport('file1') + self.myFileReport2 = athenaLogFileReport('file2') + + def tearDown(self): + for f in 'file1', 'file2': + try: + os.unlink(f) + except OSError: + pass + + def test_emptyScanLogMetaData(self): + self.assertEqual(self.myFileReport1._metaData, {}) + + def test_gatherScanLogMetaData(self): + resultDict = {'physicsComment': 'Dijet truth jet slice JZ2W, with the AU2 CT10 tune', + 'generatorName': 'Pythia8', + 'generatorTune': 'AU2 CT10', + 'keywords': 'dijet, qcd', + 'specialConfig': 'NONE', + 'contactPhysicist': 'MC group' + } + self.assertEqual(self.myFileReport2._metaData, resultDict) + +## Unittests for this module +if __name__ == '__main__': + unittest.main()