diff --git a/Tools/PyJobTransforms/python/transform.py b/Tools/PyJobTransforms/python/transform.py index 3b1c4171a2123ceebe1949756b82be1b8a931e19..bee420a05cb16bb22b474ebfddedfb02e6655840 100644 --- a/Tools/PyJobTransforms/python/transform.py +++ b/Tools/PyJobTransforms/python/transform.py @@ -5,7 +5,7 @@ # @brief Main package for new style ATLAS job transforms # @details Core class for ATLAS job transforms # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: transform.py 654738 2015-03-17 14:43:07Z graemes $ +# @version $Id: transform.py 679938 2015-07-02 22:09:59Z graemes $ # __version__ = '$Revision' @@ -373,12 +373,15 @@ class transform(object): self.validateOutFiles() + msg.debug('Transform executor succeeded') + self._exitCode = 0 + self._exitMsg = trfExit.codeToName(self._exitCode) + except trfExceptions.TransformNeedCheckException as e: msg.warning('Transform executor signaled NEEDCHECK condition: {0}'.format(e.errMsg)) self._exitCode = e.errCode self._exitMsg = e.errMsg self.generateReport(fast=False) - sys.exit(self._exitCode) except trfExceptions.TransformException as e: msg.critical('Transform executor raised %s: %s' % (e.__class__.__name__, e.errMsg)) @@ -386,19 +389,13 @@ class transform(object): self._exitMsg = e.errMsg # Try and write a job report... self.generateReport(fast=True) - sys.exit(self._exitCode) - - # As the actual executor function is not part of this class we pass the transform as an argument - # This means that simple executors do not require explicit subclassing - msg.debug('Transform executor succeeded') - self._exitCode = 0 - self._exitMsg = trfExit.codeToName(self._exitCode) - - # Just in case any stray processes have been left behind... - if ('orphanKiller' in self._argdict): - infanticide(message=True, listOrphans=True) - else: + + finally: + # Clean up any orphaned processes and exit here if things went bad infanticide(message=True) + if self._exitCode: + msg.warning('Transform now exiting early with exit code {0} ({1})'.format(e.errCode, e.errMsg)) + sys.exit(self._exitCode) ## @brief Setup the executor graph # @note This function might need to be called again when the number of 'substeps' is unknown diff --git a/Tools/PyJobTransforms/python/trfAMI.py b/Tools/PyJobTransforms/python/trfAMI.py index f9eae03de15f8ae7abbcd07d738a63c1481c1520..b4d6ab6ab1a51596a13bb8203e730d56c3601bca 100644 --- a/Tools/PyJobTransforms/python/trfAMI.py +++ b/Tools/PyJobTransforms/python/trfAMI.py @@ -523,7 +523,7 @@ def getTrfConfigFromAMI(tag, suppressNonJobOptions = True): if 'outputs' in result[0].keys(): outputs=deserialiseFromAMIString(result[0]['outputs']) - trf.outFiles=dict( (k, getOutputFileName(outputs[k]['dstype']) ) for k in outputs.iterkeys() ) + trf.outFiles=dict( (k, getOutputFileName(k.lstrip('output').rstrip('File')) ) for k in outputs.iterkeys() ) trf.outfmts=[ outputs[k]['dstype'] for k in outputs.iterkeys() ] except KeyError as e: raise TransformAMIException(AMIerrorCode, "Missing key in AMI data: {0}".format(e)) diff --git a/Tools/PyJobTransforms/python/trfArgClasses.py b/Tools/PyJobTransforms/python/trfArgClasses.py index 511b865b7bac2af032ba5ee38790448412d10357..1abbac736ed8da2404d8274485201926fbd46d41 100644 --- a/Tools/PyJobTransforms/python/trfArgClasses.py +++ b/Tools/PyJobTransforms/python/trfArgClasses.py @@ -3,7 +3,7 @@ ## @package PyJobTransforms.trfArgClasses # @brief Transform argument class definitions # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfArgClasses.py 665892 2015-05-08 14:54:36Z graemes $ +# @version $Id: trfArgClasses.py 679938 2015-07-02 22:09:59Z graemes $ import argparse import bz2 @@ -21,10 +21,11 @@ msg = logging.getLogger(__name__) import PyJobTransforms.trfExceptions as trfExceptions -from PyJobTransforms.trfFileUtils import athFileInterestingKeys, AthenaLiteFileInfo, NTUPEntries, HISTEntries, urlType, ROOTGetSize +from PyJobTransforms.trfFileUtils import athFileInterestingKeys, AthenaLiteFileInfo, NTUPEntries, HISTEntries, urlType, ROOTGetSize, inpFileInterestingKeys from PyJobTransforms.trfUtils import call, cliToKey from PyJobTransforms.trfExitCodes import trfExit as trfExit from PyJobTransforms.trfDecorators import timelimited +from PyJobTransforms.trfAMI import getAMIClient ## @class argFactory @@ -225,7 +226,6 @@ class argFloat(argument): def __init__(self, value=None, min=None, max=None, runarg=True, name=None): self._min = min self._max = max - desc = {} super(argFloat, self).__init__(value = value, runarg = runarg, name=name) ## @brief Argument value getter @@ -530,7 +530,7 @@ class argFile(argList): 'file_guid': self._generateGUID, '_exists': self._exists, } - + self._fileMetadata = {} if multipleOK is None: if self._io is 'input': self._multipleOK = True @@ -601,14 +601,36 @@ class argFile(argList): # it can produce multiple output files - this is allowed by setting <tt>allowMultiOutputs = False</tt> # @note The setter protects against the same file being added multiple times def valueSetter(self, value): - ## @note Impossible to use the argList.value setter here? super() doesn't seem to get it right: - # <tt>super(argFile, self).value = value</tt> results in an attribute error - prodSysPattern = re.compile(r'(?P<prefix>.*)\[(?P<expand>[\d\.,_]+)\](?P<suffix>.*)') - + ## @note First do parsing of string vs. lists to get list of files if isinstance(value, (list, tuple)): - self._value = list(value) + if len(value) > 0 and isinstance(value[0], dict): # Tier-0 style expanded argument with metadata + self._value=[] + for myfile in value: + try: + self._value.append(myfile['lfn']) + self._resetMetadata(files = [myfile['lfn']]) + except KeyError: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), + 'Filename (key "lfn") not found in Tier-0 file dictionary: {0}'.format(myfile)) + for k, v in myfile.iteritems(): + if k == 'guid': + self._setMetadata([myfile['lfn']], {'file_guid': v}) + elif k == 'events': + self._setMetadata([myfile['lfn']], {'nentries': v}) + elif k == 'checksum': + self._setMetadata([myfile['lfn']], {'checksum': v}) + elif k == 'dsn': + if not self._dataset: + self.dataset = v + elif self.dataset != v: + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_DATASET'), + 'Inconsistent dataset names in Tier-0 dictionary: {0} != {1}'.format(self.dataset, v)) + else: + self._value = list(value) + self._getDatasetFromFilename(reset = False) + self._resetMetadata() elif value==None: self._value = [] return @@ -621,10 +643,12 @@ class argFile(argList): self._value = [value] else: self._value = value.split(self._splitter) + self._getDatasetFromFilename(reset = False) + self._resetMetadata() except (AttributeError, TypeError): raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_CONV_FAIL'), 'Failed to convert %s to a list' % str(value)) - + ## @note Check for duplicates (N.B. preserve the order, just remove the duplicates) deDuplicatedValue = [] for fname in self._value: @@ -636,10 +660,6 @@ class argFile(argList): self._value = deDuplicatedValue msg.warning('File list after duplicate removal: {0}'.format(self._value)) - ## @note Now look for dataset notation - # TODO - handle reset of filenames from AthenaMP without trashing DS name - self._getDatasetFromFilename(reset = True) - # Find our URL type (if we actually have files!) # At the moment this is assumed to be the same for all files in this instance # although in principle one could mix different access methods in the one input file type @@ -679,7 +699,9 @@ class argFile(argList): newValue.extend(globbedNames) else: # Simple case - newValue.extend(glob.glob(filename)) + globbedFiles = glob.glob(filename) + globbedFiles.sort() + newValue.extend(globbedFiles) if len(self._value) > 0 and len(newValue) is 0: # Woops - no files! raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), @@ -789,9 +811,6 @@ class argFile(argList): raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), 'Multiple file arguments are not supported for {0} (was given: {1}'.format(self, self._value)) - # Reset the self._fileMetadata dictionary - self._resetMetadata() - @property def io(self): return (self._io) @@ -875,7 +894,7 @@ class argFile(argList): def getnentries(self, fast=False): totalEvents = 0 for fname in self._value: - events = self.getSingleMetadata(fname, 'nentries', populate = not fast) + events = self.getSingleMetadata(fname=fname, metadataKey='nentries', populate = not fast) if events is None: msg.debug('Got events=None for file {0} - returning None for this instance'.format(fname)) return None @@ -1031,6 +1050,8 @@ class argFile(argList): if files == None: files = self._value for fname in files: + if fname not in self._fileMetadata: + self._fileMetadata[fname] = {} for k, v in metadataKeys.iteritems(): msg.debug('Manualy setting {0} for file {1} to {2}'.format(k, fname, v)) self._fileMetadata[fname][k] = v @@ -1066,10 +1087,11 @@ class argFile(argList): ## @brief Look for dataset name in dataset#filename Tier0 convention # @detail At the moment all files must be in the same dataset if it's specified. - # To change this dataset will need to become a per-file metadatum. - # @param @c reset If @c True then forget previous dataset setting. Default is @c True. + # (To change this dataset will need to become a per-file metadatum.) + # @note dsn#lfn notation must be used for @b all input values and all dsn values must be the same + # @param @c reset If @c True then forget previous dataset setting. Default is @c False. # @return @c None. Side effect is to set @c self._metadata. - def _getDatasetFromFilename(self, reset = True): + def _getDatasetFromFilename(self, reset = False): if reset: self._dataset = None newValue = [] @@ -1078,12 +1100,15 @@ class argFile(argList): (dataset, fname) = filename.split('#', 1) newValue.append(fname) msg.debug('Current dataset: {0}; New dataset {1}'.format(self._dataset, dataset)) - if (self._dataset is not None) and (self._dataset != dataset): + if self._dataset and (self._dataset != dataset): raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_DATASET'), 'Found inconsistent dataset assignment in argFile setup: %s != %s' % (self._dataset, dataset)) self._dataset = dataset - else: - newValue.append(filename) + if len(newValue) == 0: + return + elif len(newValue) != len (self._value): + raise trfExceptions.TransformArgException(trfExit.nameToCode('TRF_ARG_DATASET'), + 'Found partial dataset assignment in argFile setup from {0} (dsn#lfn notation must be uniform for all inputs)'.format(self._value)) self._value = newValue ## @brief Determines the size of files. @@ -1165,6 +1190,33 @@ class argFile(argList): ## @brief String representation of a file argument def __str__(self): return "{0}={1} (Type {2}, Dataset {3}, IO {4})".format(self.name, self.value, self.type, self.dataset, self.io) + + + ## @brief Utility to strip arguments which should not be passed to the selfMerge methods + # of our child classes + # @param copyArgs If @c None copy all arguments by default, otherwise only copy the + # listed keys + def _mergeArgs(self, argdict, copyArgs=None): + if copyArgs: + myargdict = {} + for arg in copyArgs: + if arg in argdict: + myargdict[arg] = copy.copy(argdict[arg]) + + else: + myargdict = copy.copy(argdict) + # Never do event count checks for self merging + myargdict['checkEventCount'] = argSubstepBool('False', runarg=False) + if 'athenaopts' in myargdict: + # Need to ensure that "nprocs" is not passed to merger + newopts = [] + for opt in myargdict['athenaopts'].value: + if opt.startswith('--nprocs'): + continue + newopts.append(opt) + myargdict['athenaopts'] = argList(newopts, runarg=False) + return myargdict + ## @brief Athena file class # @details Never used directly, but is the parent of concrete classes @@ -1192,9 +1244,12 @@ class argAthenaFile(argFile): elif self._type.upper() in ('TAG'): aftype = 'TAG' + # retrieve GUID and nentries without runMiniAthena subprocess for input POOL files + if aftype == 'POOL' and self._io == 'input': + retrieveKeys = inpFileInterestingKeys + # N.B. Could parallelise here for fname in myFiles: - # athFileMetadata = AthenaLiteFileInfo(fname, aftype, retrieveKeys=retrieveKeys, timeout=240+30*len(myFiles), defaultrc=None) athFileMetadata = AthenaLiteFileInfo(fname, aftype, retrieveKeys=retrieveKeys) if athFileMetadata == None: raise trfExceptions.TransformMetadataException(trfExit.nameToCode('TRF_METADATA_CALL_FAIL'), 'Call to AthenaFileInfo failed') @@ -1205,31 +1260,6 @@ class argAthenaFile(argFile): def _getAthInfo(self, files): self._callAthInfo(files, doAllFiles = True, retrieveKeys=athFileInterestingKeys) - ## @brief Utility to strip arguments which should not be passed to the selfMerge methods - # of our child classes - # @param copyArgs If @c None copy all arguments by default, otherwise only copy the - # listed keys - def _mergeArgs(self, argdict, copyArgs=None): - if copyArgs: - myargdict = {} - for arg in copyArgs: - if arg in argdict: - myargdict[arg] = copy.copy(argdict[arg]) - - else: - myargdict = copy.copy(argdict) - # Never do event count checks for self merging - myargdict['checkEventCount'] = argSubstepBool('False', runarg=False) - if 'athenaopts' in myargdict: - # Need to ensure that "nprocs" is not passed to merger - newopts = [] - for opt in myargdict['athenaopts'].value: - if opt.startswith('--nprocs'): - continue - newopts.append(opt) - myargdict['athenaopts'] = argList(newopts, runarg=False) - return myargdict - @property def prodsysDescription(self): desc=super(argAthenaFile, self).prodsysDescription @@ -1257,6 +1287,47 @@ class argBSFile(argAthenaFile): desc=super(argBSFile, self).prodsysDescription return desc + ## @brief Method which can be used to merge files of this type + # @param output Target filename for this merge + # @param inputs List of files to merge + # @param argdict argdict of the transform + # @note @c argdict is not normally used as this is a @em vanilla merge + def selfMerge(self, output, inputs, argdict={}): + msg.debug('selfMerge attempted for {0} -> {1} with {2}'.format(inputs, output, argdict)) + + # First do a little sanity check + for fname in inputs: + if fname not in self._value: + raise trfExceptions.TransformMergeException(trfExit.nameToCode('TRF_FILEMERGE_PROBLEM'), + "File {0} is not part of this agument: {1}".format(fname, self)) + + from PyJobTransforms.trfExe import bsMergeExecutor, executorConfig + + ## @note Modify argdict + myargdict = self._mergeArgs(argdict) + myargdict['maskEmptyInputs'] = argBool(True) + myargdict['allowRename'] = argBool(True) + myargdict['emptyStubFile'] = argString(output) + + # We need a athenaExecutor to do the merge + # N.B. We never hybrid merge AthenaMP outputs as this would prevent further merging in another + # task (hybrid merged files cannot be further bybrid merged) + myDataDictionary = {'BS_MRG_INPUT' : argBSFile(inputs, type=self.type, io='input'), + 'BS_MRG_OUTPUT' : argBSFile(output, type=self.type, io='output')} + myMergeConf = executorConfig(myargdict, myDataDictionary, disableMP=True) + myMerger = bsMergeExecutor(name='BSMerge_AthenaMP.{0}'.format(self._subtype), conf=myMergeConf, exe = 'file_merging', + inData=set(['BS_MRG_INPUT']), outData=set(['BS_MRG_OUTPUT'])) + myMerger.doAll(input=set(['BS_MRG_INPUT']), output=set(['BS_MRG_OUTPUT'])) + + # OK, if we got to here with no exceptions, we're good shape + # Now update our own list of files to reflect the merge + for fname in inputs: + self._value.remove(fname) + self._value.append(output) + + msg.debug('Post self-merge files are: {0}'.format(self._value)) + self._resetMetadata(inputs + [output]) + ## @brief POOL file class. # @details Works for all POOL files @@ -2063,6 +2134,36 @@ class argSubstepSteering(argSubstep): retvalue.append((matchedParts.group(1), matchedParts.group(2), matchedParts.group(3))) return retvalue + +## @brief Substep class for conditionsTag +class argSubstepConditions(argSubstep): + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + msg.debug('Attempting to set argSubstepConditions from {0!s} (type {1}'.format(value, type(value))) + # super().value = value workaround: + super(self.__class__, self.__class__).value.fset(self, value) + + current = None + for k, v in self._value.iteritems(): + if "CurrentMC" == v: + if current == None: + current = self._amiLookUp(getAMIClient()) + self._value[k] = current + + def _amiLookUp(self, client): + cmd = "COMAGetGlobalTagNameByCurrentState --state=CurrentMC" + return str(client.execute(cmd, format = 'dom_object').get_rows().pop()['globalTag']) + + @property + def prodsysDescription(self): + desc = {'type': 'substep', 'substeptype': 'str', 'separator': self._separator, + 'default': self._defaultSubstep} + return desc + class trfArgParser(argparse.ArgumentParser): diff --git a/Tools/PyJobTransforms/python/trfArgs.py b/Tools/PyJobTransforms/python/trfArgs.py index 59953a8e75189fab8308e76a9859bb0455247c83..3ec5eac6065a592f29dff3f329d4786989cd9f17 100644 --- a/Tools/PyJobTransforms/python/trfArgs.py +++ b/Tools/PyJobTransforms/python/trfArgs.py @@ -3,7 +3,7 @@ ## @Package PyJobTransforms.trfArgs # @brief Standard arguments supported by trf infrastructure # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfArgs.py 652372 2015-03-06 22:13:05Z graemes $ +# @version $Id: trfArgs.py 682012 2015-07-10 07:44:44Z graemes $ import logging msg = logging.getLogger(__name__) @@ -25,8 +25,6 @@ def addStandardTrfArgs(parser): parser.add_argument('--showSteps', action='store_true', help='Show list of executor steps only, then exit') parser.add_argument('--dumpPickle', metavar='FILE', help='Interpret command line arguments and write them out as a pickle file') parser.add_argument('--dumpJSON', metavar='FILE', help='Interpret command line arguments and write them out as a JSON file') - parser.add_argument('--orphanKiller', action='store_true', help="Kill all orphaned children at the end of a job (that is, sharing the transform's pgid, but with ppid=1)." - "Beware, this is potentially dangerous in a a batch environment") parser.add_argument('--reportName', type=argFactory(trfArgClasses.argString, runarg=False), help='Base name for job reports (default name is "jobReport" for most reports, but "metadata" for classic prodsys XML)') parser.add_argument('--reportType', type=argFactory(trfArgClasses.argList, runarg=False), nargs='+', metavar='TYPE', @@ -81,8 +79,8 @@ def addAthenaArguments(parser, maxEventsDefaultSubstep='first', addValgrind=True parser.add_argument('--maxEvents', group='Athena', type=argFactory(trfArgClasses.argSubstepInt, defaultSubstep=maxEventsDefaultSubstep), nargs='+', metavar='substep:maxEvents', help='Set maximum events for each processing step (default substep is "{0}")'.format(maxEventsDefaultSubstep)) - parser.add_argument('--skipEvents', group='Athena', type=argFactory(trfArgClasses.argSubstepInt, defaultSubstep='first'), - help='Number of events to skip over in the first processing step') + parser.add_argument('--skipEvents', group='Athena', nargs='+', type=argFactory(trfArgClasses.argSubstepInt, defaultSubstep='first'), + help='Number of events to skip over in the first processing step (skipping substep can be overridden)') parser.add_argument('--asetup', group='Athena', type=argFactory(trfArgClasses.argSubstep, runarg=False), nargs='+', metavar='substep:ASETUP', help='asetup command string to be run before this substep is executed') parser.add_argument('--eventAcceptanceEfficiency', type=trfArgClasses.argFactory(trfArgClasses.argSubstepFloat, min=0.0, max=1.0, runarg=False), @@ -146,7 +144,7 @@ def addDetectorArguments(parser): parser.defineArgGroup('Detector', 'General detector configuration options, for simulation and reconstruction') parser.add_argument('--DBRelease', group = 'Detector', type=argFactory(trfArgClasses.argSubstep, runarg=False), metavar='substep:DBRelease', nargs='+', help='Use DBRelease instead of ORACLE. Give either a DBRelease tarball file (e.g., DBRelease-21.7.1.tar.gz) or cvmfs DBRelease directory (e.g., 21.7.1 or current') - parser.add_argument('--conditionsTag', group='Detector', type=argFactory(trfArgClasses.argSubstep), metavar='substep:CondTag', nargs='+', + parser.add_argument('--conditionsTag', group='Detector', type=argFactory(trfArgClasses.argSubstepConditions), metavar='substep:CondTag', nargs='+', help='Conditions tag to set') parser.add_argument('--geometryVersion', group='Detector', type=argFactory(trfArgClasses.argSubstep), metavar='substep:GeoVersion', nargs='+', help='ATLAS geometry version tag') @@ -316,7 +314,8 @@ class dpdType(object): # @param argclass The argument class to be used for this data # @param treeNames For DPD types only, the tree(s) used for event counting (if @c None then # no event counting can be done. - def __init__(self, name, type = None, substeps = [], argclass = None, treeNames = None): + # @param help Help string to generate for this argument + def __init__(self, name, type = None, substeps = [], argclass = None, treeNames = None, help = None): self._name = name ## @note Not very clear how useful this actually is, but we @@ -361,6 +360,7 @@ class dpdType(object): else: self._argclass = argclass + self._help = help self._treeNames = treeNames @property @@ -379,6 +379,10 @@ class dpdType(object): def argclass(self): return self._argclass + @property + def help(self): + return self._help + @property def treeNames(self): return self._treeNames @@ -398,8 +402,16 @@ def getExtraDPDList(NTUPOnly = False): extraDPDs.append(dpdType('NTUP_SUSYTRUTH', substeps=['a2d'], treeNames=['truth'])) extraDPDs.append(dpdType('NTUP_HIGHMULT', substeps=['e2a'], treeNames=['MinBiasTree'])) extraDPDs.append(dpdType('NTUP_PROMPTPHOT', substeps=['e2d', 'a2d'], treeNames=["PAUReco","HggUserData"])) - - if not NTUPOnly: + + extraDPDs.append(dpdType('NTUP_MCPTP', substeps=['a2d'], help="Ntuple file for MCP Tag and Probe")) + extraDPDs.append(dpdType('NTUP_MCPScale', substeps=['a2d'], help="Ntuple file for MCP scale calibration")) + + # Trigger NTUPs (for merging only!) + if NTUPOnly: + extraDPDs.append(dpdType('NTUP_TRIGCOST', treeNames=['trig_cost'])) + extraDPDs.append(dpdType('NTUP_TRIGRATE', treeNames=['trig_cost'])) + extraDPDs.append(dpdType('NTUP_TRIGEBWGHT', treeNames=['trig_cost'])) + else: extraDPDs.append(dpdType('DAOD_HSG2')) extraDPDs.append(dpdType('DESDM_ZMUMU')) @@ -416,7 +428,7 @@ def getExtraDPDList(NTUPOnly = False): def addExtraDPDTypes(parser, pick=None, transform=None, multipleOK=False, NTUPMergerArgs = False): parser.defineArgGroup('Additional DPDs', 'Extra DPD file types') - extraDPDs = getExtraDPDList() + extraDPDs = getExtraDPDList(NTUPOnly=NTUPMergerArgs) if NTUPMergerArgs: for dpd in extraDPDs: @@ -425,11 +437,11 @@ def addExtraDPDTypes(parser, pick=None, transform=None, multipleOK=False, NTUPMe parser.add_argument('--input' + dpd.name + 'File', type=argFactory(dpd.argclass, multipleOK=True, io='input', type=dpd.type, treeNames=dpd.treeNames), group = 'Additional DPDs', metavar=dpd.name.upper(), nargs='+', - help='DPD input {0} file'.format(dpd.name)) + help=dpd.help if dpd.help else 'DPD input {0} file'.format(dpd.name)) parser.add_argument('--output' + dpd.name + '_MRGFile', type=argFactory(dpd.argclass, multipleOK=multipleOK, type=dpd.type, treeNames=dpd.treeNames), group = 'Additional DPDs', metavar=dpd.name.upper(), - help='DPD output merged {0} file'.format(dpd.name)) + help=dpd.help if dpd.help else 'DPD output merged {0} file'.format(dpd.name)) pass else: @@ -441,12 +453,12 @@ def addExtraDPDTypes(parser, pick=None, transform=None, multipleOK=False, NTUPMe parser.add_argument('--output' + dpd.name + 'File', type=argFactory(dpd.argclass, multipleOK=multipleOK, type=dpd.type, treeNames=dpd.treeNames), group = 'Additional DPDs', metavar=dpd.name.upper(), - help='DPD output {0} file'.format(dpd.name)) + help=dpd.help if dpd.help else 'DPD output {0} file'.format(dpd.name)) else: parser.add_argument('--output' + dpd.name + 'File', type=argFactory(dpd.argclass, multipleOK=multipleOK, type=dpd.type), group = 'Additional DPDs', metavar=dpd.name.upper(), - help='DPD output {0} file'.format(dpd.name)) + help=dpd.help if dpd.help else 'DPD output {0} file'.format(dpd.name)) if transform: for executor in transform.executors: if hasattr(executor, 'substep') and executor.substep in dpd.substeps: @@ -457,13 +469,6 @@ def addExtraDPDTypes(parser, pick=None, transform=None, multipleOK=False, NTUPMe def addFileValidationArguments(parser): parser.defineArgGroup('File Validation', 'Standard file validation switches') - parser.add_argument('--skipFileValidation', '--omitFileValidation', action='store_true', - group='File Validation', help='DEPRECATED. Use --fileValidation BOOL instead') - parser.add_argument('--skipInputFileValidation', '--omitInputFileValidation', action='store_true', - group='File Validation', help='DEPRECATED. Use --inputFileValidation BOOL instead') - parser.add_argument('--skipOutputFileValidation', '--omitOutputFileValidation', action='store_true', - group='File Validation', help='DEPRECATED. Use --outputFileValidation BOOL instead') - parser.add_argument('--fileValidation', type = argFactory(trfArgClasses.argBool), metavar='BOOL', group='File Validation', help='If FALSE skip both input and output file validation (default TRUE; warning - do not use this option in production jobs!)') parser.add_argument('--inputFileValidation', type = argFactory(trfArgClasses.argBool), metavar='BOOL', diff --git a/Tools/PyJobTransforms/python/trfExe-bak.py b/Tools/PyJobTransforms/python/trfExe-bak.py deleted file mode 100755 index 6a5c83748872e819e366efae526842e86ea86aaf..0000000000000000000000000000000000000000 --- a/Tools/PyJobTransforms/python/trfExe-bak.py +++ /dev/null @@ -1,1602 +0,0 @@ -# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration - -## @package PyJobTransforms.trfExe -# -# @brief Transform execution functions -# @details Standard transform executors -# @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfExe.py 643045 2015-01-30 13:43:56Z graemes $ - -import copy -import math -import os -import os.path as path -import re -import shutil -import subprocess -import sys -import time - -from xml.etree import ElementTree - -import logging -msg = logging.getLogger(__name__) - -from PyJobTransforms.trfJobOptions import JobOptionsTemplate -from PyJobTransforms.trfUtils import asetupReport, unpackDBRelease, setupDBRelease, cvmfsDBReleaseCheck, forceToAlphaNum, releaseIsOlderThan, ValgrindCommand -from PyJobTransforms.trfExitCodes import trfExit -from PyJobTransforms.trfLogger import stdLogLevels - - -import PyJobTransforms.trfExceptions as trfExceptions -import PyJobTransforms.trfValidation as trfValidation -import PyJobTransforms.trfArgClasses as trfArgClasses -import PyJobTransforms.trfEnv as trfEnv - -## @note This class contains the configuration information necessary to run an executor. -# In most cases this is simply a collection of references to the parent transform, however, -# abstraction is done via an instance of this class so that 'lightweight' executors can -# be run for auxiliary purposes (e.g., file merging after AthenaMP was used, where the merging -# is outside of the main workflow, but invoked in the main executor's "postExecute" method). -class executorConfig(object): - - ## @brief Configuration for an executor - # @param argdict Argument dictionary for this executor - # @param dataDictionary Mapping from input data names to argFile instances - # @param firstExecutor Boolean set to @c True if we are the first executor - # @param disableMP Ensure that AthenaMP is not used (i.e., also unset - # @c ATHENA_PROC_NUMBER before execution) - def __init__(self, argdict={}, dataDictionary={}, firstExecutor=False, disableMP=False): - self._argdict = argdict - self._dataDictionary = dataDictionary - self._firstExecutor = firstExecutor - self._disableMP = disableMP - - @property - def argdict(self): - return self._argdict - - @argdict.setter - def argdict(self, value): - self._argdict = value - - @property - def dataDictionary(self): - return self._dataDictionary - - @dataDictionary.setter - def dataDictionary(self, value): - self._dataDictionary = value - - @property - def firstExecutor(self): - return self._firstExecutor - - @firstExecutor.setter - def firstExecutor(self, value): - self._firstExecutor = value - - @property - def disableMP(self): - return self._disableMP - - @disableMP.setter - def disableMP(self, value): - self._disableMP = value - - ## @brief Set configuration properties from the parent transform - # @note It's not possible to set firstExecutor here as the transform holds - # the name of the first executor, which we don't know... (should we?) - def setFromTransform(self, trf): - self._argdict = trf.argdict - self._dataDictionary = trf.dataDictionary - - ## @brief Add a new object to the argdict - def addToArgdict(self, key, value): - self._argdict[key] = value - - ## @brief Add a new object to the dataDictionary - def addToDataDictionary(self, key, value): - self._dataDictionary[key] = value - - -## Executors always only even execute a single step, as seen by the transform -class transformExecutor(object): - - ## @brief Base class initaliser for transform executors - # @param name Transform name - # @param trf Parent transform - # @param conf executorConfig object (if @None then set from the @c trf directly) - # @param inData Data inputs this transform can start from. This should be a list, tuple or set - # consisting of each input data type. If a tuple (or list) is passed as a set member then this is interpreted as - # meaning that all of the data members in that tuple are necessary as an input. - # @note Curiously, sets are not allowed to be members of sets (they are not hashable, so no sub-sets) - # @param outData List of outputs this transform can produce (list, tuple or set can be used) - def __init__(self, name = 'Dummy', trf = None, conf = None, inData = set(), outData = set()): - # Some information to produce helpful log messages - self._name = forceToAlphaNum(name) - - # Data this executor can start from and produce - # Note we transform NULL to inNULL and outNULL as a convenience - self._inData = set(inData) - self._outData = set(outData) - if 'NULL' in self._inData: - self._inData.remove('NULL') - self._inData.add('inNULL') - if 'NULL' in self._outData: - self._outData.remove('NULL') - self._outData.add('outNULL') - - # It's forbidden for an executor to consume and produce the same datatype - dataOverlap = self._inData & self._outData - if len(dataOverlap) > 0: - raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_GRAPH_ERROR'), - 'Executor definition error, executor {0} is not allowed to produce and consume the same datatypes. Duplicated input/output types {1}'.format(self._name, ' '.join(dataOverlap))) - - ## Executor configuration: - # @note that if conf and trf are @c None then we'll probably set the conf up later (this is allowed and - # expected to be done once the master transform has figured out what it's doing for this job) - if conf is not None: - self.conf = conf - else: - self.conf = executorConfig() - if trf is not None: - self.conf.setFromTransform(trf) - - # Execution status - self._hasExecuted = False - self._rc = -1 - self._errMsg = None - - # Validation status - self._hasValidated = False - self._isValidated = False - - # Extra metadata - # This dictionary holds extra metadata for this executor which will be - # provided in job reports - self._extraMetadata = {} - - ## @note Place holders for resource consumption. CPU and walltime are available for all executors - # but currently only athena is instrumented to fill in memory stats (and then only if PerfMonSD is - # enabled). - self._exeStart = self._exeStop = None - self._memStats = {} - - - ## Now define properties for these data members - @property - def name(self): - return self._name - - @property - def substep(self): - if '_substep' in dir(self): - return self._substep - return None - - @property - def trf(self): - if '_trf' in dir(self): - return self._trf - return None - - @trf.setter - def trf(self, value): - self._trf = value - - @property - def inData(self): - ## @note Might not be set in all executors... - if '_inData' in dir(self): - return self._inData - return None - - @inData.setter - def inData(self, value): - self._inData = set(value) - - def inDataUpdate(self, value): - ## @note Protect against _inData not yet being defined - if '_inData' in dir(self): - self._inData.update(value) - else: - ## @note Use normal setter - self.inData = value - - - @property - def outData(self): - ## @note Might not be set in all executors... - if '_outData' in dir(self): - return self._outData - return None - - @outData.setter - def outData(self, value): - self._outData = set(value) - - def outDataUpdate(self, value): - ## @note Protect against _outData not yet being defined - if '_outData' in dir(self): - self._outData.update(value) - else: - ## @note Use normal setter - self.outData = value - - @property - ## @note This returns the @b actual input data with which this executor ran - # (c.f. @c inData which returns all the possible data types this executor could run with) - def input(self): - ## @note Might not be set in all executors... - if '_input' in dir(self): - return self._input - return None - - @property - ## @note This returns the @b actual output data with which this executor ran - # (c.f. @c outData which returns all the possible data types this executor could run with) - def output(self): - ## @note Might not be set in all executors... - if '_output' in dir(self): - return self._output - return None - - @property - def extraMetadata(self): - return self._extraMetadata - - @property - def hasExecuted(self): - return self._hasExecuted - - @property - def rc(self): - return self._rc - - @property - def errMsg(self): - return self._errMsg - - @property - def validation(self): - return self._validation - - @validation.setter - def validation(self, value): - self._validation = value - - @property - def hasValidated(self): - return self._hasValidated - - @property - def isValidated(self): - return self._isValidated - - ## @note At the moment only athenaExecutor sets this property, but that might be changed... - @property - def first(self): - if hasattr(self, '_first'): - return self._first - else: - return None - - @property - def exeStartTimes(self): - return self._exeStart - - @property - def exeStopTimes(self): - return self._exeStop - - @property - def cpuTime(self): - if self._exeStart and self._exeStop: - return int(reduce(lambda x1, x2: x1+x2, map(lambda x1, x2: x2-x1, self._exeStart[2:4], self._exeStop[2:4])) + 0.5) - else: - return None - - @property - def usrTime(self): - if self._exeStart and self._exeStop: - return int(self._exeStop[2] - self._exeStart[2] + 0.5) - else: - return None - - @property - def sysTime(self): - if self._exeStart and self._exeStop: - return int(self._exeStop[3] - self._exeStart[3] + 0.5) - else: - return None - - @property - def wallTime(self): - if self._exeStart and self._exeStop: - return int(self._exeStop[4] - self._exeStart[4] + 0.5) - else: - return None - - @property - def memStats(self): - return self._memStats - - - def preExecute(self, input = set(), output = set()): - msg.info('Preexecute for %s' % self._name) - - def execute(self): - self._exeStart = os.times() - msg.info('Starting execution of %s' % self._name) - self._hasExecuted = True - self._rc = 0 - self._errMsg = '' - msg.info('%s executor returns %d' % (self._name, self._rc)) - self._exeStop = os.times() - - def postExecute(self): - msg.info('Postexecute for %s' % self._name) - - def validate(self): - self._hasValidated = True - msg.info('Executor %s has no validation function - assuming all ok' % self._name) - self._isValidated = True - self._errMsg = '' - - ## Convenience function - def doAll(self, input=set(), output=set()): - self.preExecute(input, output) - self.execute() - self.postExecute() - self.validate() - -## @brief Special executor that will enable a logfile scan as part of its validation -class logscanExecutor(transformExecutor): - def __init__(self, name = 'Logscan'): - super(logscanExecutor, self).__init__(name=name) - self._errorMaskFiles = None - self._logFileName = None - - def preExecute(self, input = set(), output = set()): - msg.info('Preexecute for %s' % self._name) - if 'logfile' in self.conf.argdict: - self._logFileName = self.conf.argdict['logfile'].value - - def validate(self): - msg.info("Starting validation for {0}".format(self._name)) - if self._logFileName: - ## TODO: This is a cut'n'paste from the athenaExecutor - # We really should factorise this and use it commonly - if 'ignorePatterns' in self.conf.argdict: - igPat = self.conf.argdict['ignorePatterns'].value - else: - igPat = [] - if 'ignoreFiles' in self.conf.argdict: - ignorePatterns = trfValidation.ignorePatterns(files = self.conf.argdict['ignoreFiles'].value, extraSearch=igPat) - elif self._errorMaskFiles is not None: - ignorePatterns = trfValidation.ignorePatterns(files = self._errorMaskFiles, extraSearch=igPat) - else: - ignorePatterns = trfValidation.ignorePatterns(files = athenaExecutor._defaultIgnorePatternFile, extraSearch=igPat) - - # Now actually scan my logfile - msg.info('Scanning logfile {0} for errors'.format(self._logFileName)) - self._logScan = trfValidation.athenaLogFileReport(logfile = self._logFileName, ignoreList = ignorePatterns) - worstError = self._logScan.worstError() - - # In general we add the error message to the exit message, but if it's too long then don't do - # that and just say look in the jobReport - if worstError['firstError']: - if len(worstError['firstError']['message']) > athenaExecutor._exitMessageLimit: - if 'CoreDumpSvc' in worstError['firstError']['message']: - exitErrorMessage = "Core dump at line {0} (see jobReport for further details)".format(worstError['firstError']['firstLine']) - elif 'G4Exception' in worstError['firstError']['message']: - exitErrorMessage = "G4 exception at line {0} (see jobReport for further details)".format(worstError['firstError']['firstLine']) - else: - exitErrorMessage = "Long {0} message at line {1} (see jobReport for further details)".format(worstError['level'], worstError['firstError']['firstLine']) - else: - exitErrorMessage = "Logfile error in {0}: \"{1}\"".format(self._logFileName, worstError['firstError']['message']) - else: - exitErrorMessage = "Error level {0} found (see athena logfile for details)".format(worstError['level']) - - # Very simple: if we get ERROR or worse, we're dead, except if ignoreErrors=True - if worstError['nLevel'] == stdLogLevels['ERROR'] and ('ignoreErrors' in self.conf.argdict and self.conf.argdict['ignoreErrors'].value is True): - msg.warning('Found ERRORs in the logfile, but ignoring this as ignoreErrors=True (see jobReport for details)') - elif worstError['nLevel'] >= stdLogLevels['ERROR']: - self._isValidated = False - msg.error('Fatal error in athena logfile (level {0})'.format(worstError['level'])) - raise trfExceptions.TransformLogfileErrorException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), - 'Fatal error in athena logfile: "{0}"'.format(exitErrorMessage)) - - # Must be ok if we got here! - msg.info('Executor {0} has validated successfully'.format(self.name)) - self._isValidated = True - self._errMsg = '' - - -class echoExecutor(transformExecutor): - def __init__(self, name = 'Echo', trf = None): - - # We are only changing the default name here - super(echoExecutor, self).__init__(name=name, trf=trf) - - - def execute(self): - self._exeStart = os.times() - msg.info('Starting execution of %s' % self._name) - msg.info('Transform argument dictionary now follows:') - for k, v in self.conf.argdict.iteritems(): - print "%s = %s" % (k, v) - self._hasExecuted = True - self._rc = 0 - self._errMsg = '' - msg.info('%s executor returns %d' % (self._name, self._rc)) - self._exeStop = os.times() - - -class scriptExecutor(transformExecutor): - def __init__(self, name = 'Script', trf = None, conf = None, inData = set(), outData = set(), exe = None, exeArgs = None): - # Name of the script we want to execute - self._exe = exe - - # With arguments (currently this means paste in the corresponding _argdict entry) - self._exeArgs = exeArgs - - super(scriptExecutor, self).__init__(name=name, trf=trf, conf=conf, inData=inData, outData=outData) - - self._extraMetadata.update({'script' : exe}) - - # Decide if we echo script output to stdout - self._echoOutput = False - - # Can either be written by base class or child - self._cmd = None - - # Do I memory monitor my child? - self._memoryMonitor = True - self._memoryMonitorInterval = 10 - - @property - def exe(self): - return self._exe - - @exe.setter - def exe(self, value): - self._exe = value - self._extraMetadata['script'] = value - - @property - def exeArgs(self): - return self._exeArgs - - @exeArgs.setter - def exeArgs(self, value): - self._exeArgs = value -# self._extraMetadata['scriptArgs'] = value - - def preExecute(self, input = set(), output = set()): - msg.debug('scriptExecutor: Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - - self._input = input - self._output = output - - ## @note If an inherited class has set self._cmd leave it alone - if self._cmd is None: - self._buildStandardCommand() - msg.info('Will execute script as %s', self._cmd) - - # Define this here to have it for environment detection messages - self._logFileName = "log.{0}".format(self._name) - - ## @note Query the environment for echo configuration - # Let the manual envars always win over auto-detected settings - if 'TRF_ECHO' in os.environ: - msg.info('TRF_ECHO envvar is set - enabling command echoing to stdout') - self._echoOutput = True - elif 'TRF_NOECHO' in os.environ: - msg.info('TRF_NOECHO envvar is set - disabling command echoing to stdout') - self._echoOutput = False - # PS1 is for sh, bash; prompt is for tcsh and zsh - elif 'PS1' in os.environ or 'prompt' in os.environ: - msg.info('Interactive environment detected (shell prompt) - enabling command echoing to stdout') - self._echoOutput = True - elif os.isatty(sys.stdout.fileno()) or os.isatty(sys.stdin.fileno()): - msg.info('Interactive environment detected (stdio or stdout is a tty) - enabling command echoing to stdout') - self._echoOutput = True - elif 'TZHOME' in os.environ: - msg.info('Tier-0 environment detected - enabling command echoing to stdout') - self._echoOutput = True - if self._echoOutput == False: - msg.info('Batch/grid running - command outputs will not be echoed. Logs for {0} are in {1}'.format(self._name, self._logFileName)) - - # Now setup special loggers for logging execution messages to stdout and file - self._echologger = logging.getLogger(self._name) - self._echologger.setLevel(logging.INFO) - self._echologger.propagate = False - - self._exeLogFile = logging.FileHandler(self._logFileName, mode='w') - self._exeLogFile.setFormatter(logging.Formatter('%(asctime)s %(message)s', datefmt='%H:%M:%S')) - self._echologger.addHandler(self._exeLogFile) - - if self._echoOutput: - self._echostream = logging.StreamHandler(sys.stdout) - self._echostream.setFormatter(logging.Formatter('%(name)s %(asctime)s %(message)s', datefmt='%H:%M:%S')) - self._echologger.addHandler(self._echostream) - - def _buildStandardCommand(self): - if self._exe: - self._cmd = [self.exe, ] - else: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), - 'No executor set in {0}'.format(self.__class__.__name__)) - for arg in self.exeArgs: - if arg in self.conf.argdict: - # If we have a list then add each element to our list, else just str() the argument value - # Note if there are arguments which need more complex transformations then - # consider introducing a special toExeArg() method. - if isinstance(self.conf.argdict[arg].value, list): - self._cmd.extend([ str(v) for v in self.conf.argdict[arg].value]) - else: - self._cmd.append(str(self.conf.argdict[arg].value)) - - - def execute(self): - self._hasExecuted = True - msg.info('Starting execution of {0} ({1})'.format(self._name, self._cmd)) - - self._exeStart = os.times() - if ('execOnly' in self.conf.argdict and self.conf.argdict['execOnly'] == True): - msg.info('execOnly flag is set - execution will now switch, replacing the transform') - os.execvp(self._cmd[0], self._cmd) - - if self._memoryMonitor: - lastStamp = time.time() - try: - import MemoryMonitor - except ImportError: - msg.warning("Failed to import MemoryMonitor - memory monitoring is disabled") - self._memoryMonitor = False - try: - p = subprocess.Popen(self._cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) - while p.poll() is None: - line = p.stdout.readline() - if line: - self._echologger.info(line.rstrip()) - if self._memoryMonitor and time.time() - lastStamp >= self._memoryMonitorInterval: - values = MemoryMonitor.GetMemoryValues(p.pid) - lastStamp = time.time() - print "Mem Monitor:", values - # Hoover up remaining buffered output lines - for line in p.stdout: - self._echologger.info(line.rstrip()) - - self._rc = p.returncode - msg.info('%s executor returns %d' % (self._name, self._rc)) - self._exeStop = os.times() - except OSError as e: - errMsg = 'Execution of {0} failed and raised OSError: {1}'.format(self._cmd[0], e) - msg.error(errMsg) - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC'), errMsg) - - - def postExecute(self): - if hasattr(self._exeLogFile, 'close'): - self._exeLogFile.close() - - - def validate(self): - self._hasValidated = True - - ## Check rc - if self._rc == 0: - msg.info('Executor {0} validated successfully (return code {1})'.format(self._name, self._rc)) - self._isValidated = True - self._errMsg = '' - else: - # Want to learn as much as possible from the non-zero code - # this is a bit hard in general, although one can do signals. - # Probably need to be more specific per exe, i.e., athena non-zero codes - self._isValidated = False - if self._rc < 0: - # Map return codes to what the shell gives (128 + SIGNUM) - self._rc = 128 - self._rc - if trfExit.codeToSignalname(self._rc) != "": - self._errMsg = '{0} got a {1} signal (exit code {2})'.format(self._name, trfExit.codeToSignalname(self._rc), self._rc) - else: - self._errMsg = 'Non-zero return code from %s (%d)' % (self._name, self._rc) - raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_FAIL'), self._errMsg) - - ## Check event counts (always do this by default) - # Do this here so that all script executors have this by default (covers most use cases with events) - if 'checkEventCount' in self.conf.argdict.keys() and self.conf.argdict['checkEventCount'].returnMyValue(exe=self) is False: - msg.info('Event counting for substep {0} is skipped'.format(self.name)) - else: - checkcount=trfValidation.eventMatch(self) - checkcount.decide() - msg.info('Event counting for substep {0} passed'.format(self.name)) - - - -class athenaExecutor(scriptExecutor): - _exitMessageLimit = 200 # Maximum error message length to report in the exitMsg - _defaultIgnorePatternFile = ['atlas_error_mask.db'] - - ## @brief Initialise athena executor - # @param name Executor name - # @param trf Parent transform - # @param skeletonFile athena skeleton job options file (optionally this can be a list of skeletons - # that will be given to athena.py in order); can be set to @c None to disable writing job options - # files at all - # @param exe Athena execution script - # @param exeArgs Transform argument names whose value is passed to athena - # @param substep The athena substep this executor represents (alias for the name) - # @param inputEventTest Boolean switching the skipEvents < inputEvents test - # @param perfMonFile Name of perfmon file for this substep (used to retrieve vmem/rss information) - # @param tryDropAndReload Boolean switch for the attempt to add '--drop-and-reload' to athena args - # @param extraRunargs Dictionary of extra runargs to write into the job options file, using repr - # @param runtimeRunargs Dictionary of extra runargs to write into the job options file, using str - # @param literalRunargs List of extra lines to write into the runargs file - # @param dataArgs List of datatypes that will always be given as part of this transform's runargs - # even if not actually processed by this substep (used, e.g., to set random seeds for some generators) - # @param checkEventCount Compare the correct number of events in the output file (either input file size or maxEvents) - # @param errorMaskFiles List of files to use for error masks in logfile scanning (@c None means not set for this - # executor, so use the transform or the standard setting) - # @param manualDataDictionary Instead of using the inData/outData parameters that binds the data types for this - # executor to the workflow graph, run the executor manually with these data parameters (useful for - # post-facto executors, e.g., for AthenaMP merging) - # @note The difference between @c extraRunargs, @runtimeRunargs and @literalRunargs is that: @c extraRunargs - # uses repr(), so the RHS is the same as the python object in the transform; @c runtimeRunargs uses str() so - # that a string can be interpreted at runtime; @c literalRunargs allows the direct insertion of arbitary python - # snippets into the runArgs file. - def __init__(self, name = 'athena', trf = None, conf = None, skeletonFile = 'PyJobTransforms/skeleton.dummy.py', inData = set(), - outData = set(), exe = 'athena.py', exeArgs = ['athenaopts'], substep = None, inputEventTest = True, - perfMonFile = None, tryDropAndReload = True, extraRunargs = {}, runtimeRunargs = {}, - literalRunargs = [], dataArgs = [], checkEventCount = False, errorMaskFiles = None, - manualDataDictionary = None): - - self._substep = forceToAlphaNum(substep) - self._athenaMP = None # As yet unknown; N.B. this flag is used for AthenaMP version 2+. For AthenaMP-I it is set to False - self._inputEventTest = inputEventTest - self._perfMonFile = perfMonFile - self._tryDropAndReload = tryDropAndReload - self._extraRunargs = extraRunargs - self._runtimeRunargs = runtimeRunargs - self._literalRunargs = literalRunargs - self._dataArgs = dataArgs - self._errorMaskFiles = errorMaskFiles - - # SkeletonFile can be None (disable) or a string or a list of strings - normalise it here - if type(skeletonFile) is str: - self._skeleton = [skeletonFile] - else: - self._skeleton = skeletonFile - - super(athenaExecutor, self).__init__(name=name, trf=trf, conf=conf, inData=inData, outData=outData, exe=exe, exeArgs=exeArgs) - - # Add athena specific metadata - self._extraMetadata.update({'substep': substep}) - - # Setup JO templates - if self._skeleton is not None: - self._jobOptionsTemplate = JobOptionsTemplate(exe = self, version = '$Id: trfExe.py 643045 2015-01-30 13:43:56Z graemes $') - else: - self._jobOptionsTemplate = None - - - - @property - def substep(self): - return self._substep - - def preExecute(self, input = set(), output = set()): - msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - - # Try to detect AthenaMP mode - # The first flag indicates if the transform needs to handle the AthenaMP merging (i.e., AthenaMP v2) - # The first flag is set true in order to disable the --drop-and-reload option because AthenaMP v1 - # cannot handle it - self._athenaMP, self._athenaMPv1 = self._detectAthenaMP() - - # And if this is athenaMP, then set some options for workers and output file report - if self._athenaMP: - self._athenaMPWorkerTopDir = 'athenaMP-workers-{0}-{1}'.format(self._name, self._substep) - self._athenaMPFileReport = 'athenaMP-outputs-{0}-{1}'.format(self._name, self._substep) - # See if we have options for the target output file size - if 'athenaMPMergeTargetSize' in self.conf._argdict: - for dataType, targetSize in self.conf._argdict['athenaMPMergeTargetSize'].value.iteritems(): - if dataType in self.conf._dataDictionary: - self.conf._dataDictionary[dataType].mergeTargetSize = targetSize * 1000000 # Convert from MB to B - msg.info('Set target merge size for {0} to {1}'.format(dataType, self.conf._dataDictionary[dataType].mergeTargetSize)) - elif 'ALL' in self.conf._dataDictionary: - self.conf._dataDictionary['ALL'].mergeTargetSize = targetSize * 1000000 - msg.info('Set target merge size for {0} to {1} (from ALL value)'.format(dataType, self.conf._dataDictionary[dataType].mergeTargetSize)) - else: - self._athenaMPWorkerTopDir = self._athenaMPFileReport = None - - - # Check we actually have events to process! - if (self._inputEventTest and 'skipEvents' in self.conf.argdict and - self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) is not None): - msg.debug('Will test for events to process') - for dataType in input: - inputEvents = self.conf.dataDictionary[dataType].nentries - msg.debug('Got {0} events for {1}'.format(inputEvents, dataType)) - if not isinstance(inputEvents, (int, long)): - msg.warning('Are input events countable? Got nevents={0} so disabling event count check for this input'.format(inputEvents)) - elif self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) >= inputEvents: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_NOEVENTS'), - 'No events to process: {0} (skipEvents) >= {1} (inputEvents of {2}'.format(self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor), inputEvents, dataType)) - - ## Write the skeleton file and prep athena - if self._skeleton is not None: - inputFiles = dict() - for dataType in input: - inputFiles[dataType] = self.conf.dataDictionary[dataType] - outputFiles = dict() - for dataType in output: - outputFiles[dataType] = self.conf.dataDictionary[dataType] - - # See if we have any 'extra' file arguments - for dataType, dataArg in self.conf.dataDictionary.iteritems(): - if dataArg.io == 'input' and self._name in dataArg.executor: - inputFiles[dataArg.subtype] = dataArg - - msg.debug('Input Files: {0}; Output Files: {1}'.format(inputFiles, outputFiles)) - - # Get the list of top options files that will be passed to athena (=runargs file + all skeletons) - self._topOptionsFiles = self._jobOptionsTemplate.getTopOptions(input = inputFiles, - output = outputFiles) - - ## Add input/output file information - this can't be done in __init__ as we don't know what our - # inputs and outputs will be then - if len(input) > 0: - self._extraMetadata['inputs'] = list(input) - if len(output) > 0: - self._extraMetadata['outputs'] = list(output) - - ## Do we need to run asetup first? - asetupString = None - if 'asetup' in self.conf.argdict: - asetupString = self.conf.argdict['asetup'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) - else: - msg.info('Asetup report: {0}'.format(asetupReport())) - - ## DBRelease configuration - dbrelease = dbsetup = None - if 'DBRelease' in self.conf.argdict: - dbrelease = self.conf.argdict['DBRelease'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) - if dbrelease: - # Classic tarball - filename format is DBRelease-X.Y.Z.tar.gz - dbdMatch = re.match(r'DBRelease-([\d\.]+)\.tar\.gz', path.basename(dbrelease)) - if dbdMatch: - msg.debug('DBRelease setting {0} matches classic tarball file'.format(dbrelease)) - if not os.access(dbrelease, os.R_OK): - msg.warning('Transform was given tarball DBRelease file {0}, but this is not there'.format(dbrelease)) - msg.warning('I will now try to find DBRelease {0} in cvmfs'.format(dbdMatch.group(1))) - dbrelease = dbdMatch.group(1) - dbsetup = cvmfsDBReleaseCheck(dbrelease) - else: - # Check if the DBRelease is setup - unpacked, dbsetup = unpackDBRelease(tarball=dbrelease, dbversion=dbdMatch.group(1)) - if unpacked: - # Now run the setup.py script to customise the paths to the current location... - setupDBRelease(dbsetup) - # For cvmfs we want just the X.Y.Z release string (and also support 'current') - else: - dbsetup = cvmfsDBReleaseCheck(dbrelease) - - ## Look for environment updates and perpare the athena command line - self._envUpdate = trfEnv.environmentUpdate() - self._envUpdate.setStandardEnvironment(self.conf.argdict, name=self.name, substep=self.substep) - self._prepAthenaCommandLine() - - - super(athenaExecutor, self).preExecute(input, output) - - # Now we always write a wrapper, because it's very convenient for re-running individual substeps - # This will have asetup and/or DB release setups in it - # Do this last in this preExecute as the _cmd needs to be finalised - msg.info('Now writing wrapper for substep executor {0}'.format(self._name)) - self._writeAthenaWrapper(asetup=asetupString, dbsetup=dbsetup) - msg.info('Athena will be executed in a subshell via {0}'.format(self._cmd)) - - - def postExecute(self): - super(athenaExecutor, self).postExecute() - - # If this was an athenaMP run then we need to update output files - if self._athenaMP: - if path.exists(self._athenaMPFileReport): - try: - try: - outputFileArgs = [ self.conf.dataDictionary[dataType] for dataType in self._output ] - except KeyError, e: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC'), - 'Failed to find output file argument instances for outputs {0} in {1}'.format(self.outData, self.name)) - mpOutputs = ElementTree.ElementTree() - mpOutputs.parse(self._athenaMPFileReport) - for filesElement in mpOutputs.getroot().getiterator(tag='Files'): - msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib)) - originalArg = None - originalName = filesElement.attrib['OriginalName'] - for fileArg in outputFileArgs: - if fileArg.value[0] == originalName: - originalArg = fileArg - break - if originalArg is None: - msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(originalName)) - continue - msg.debug('Found matching argument {0}'.format(originalArg)) - fileNameList = [] - for fileElement in filesElement.getiterator(tag='File'): - msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib)) - fileNameList.append(fileElement.attrib['name']) - # Now update argument with the new name list and reset metadata - originalArg.multipleOK = True - originalArg.value = fileNameList - originalArg.originalName = originalName - msg.debug('Argument {0} value now {1}'.format(originalArg, originalArg.value)) - # Merge? - if originalArg.io is 'output' and len(originalArg.value) > 1: - msg.debug('{0} files {1} are candidates for smart merging'.format(originalArg.name, originalArg.value)) - self._smartMerge(originalArg) - except Exception, e: - msg.error('Exception thrown when processing athenaMP outputs report {0}: {1}'.format(self._athenaMPFileReport, e)) - msg.error('Validation is now very likely to fail') - raise - else: - msg.warning('AthenaMP run was set to True, but no outputs file was found') - - # If we have a perfmon file, get memory information - if self._perfMonFile: - try: - import PerfMonComps.PMonSD - info = PerfMonComps.PMonSD.parse(self._perfMonFile) - vmem_peak = int(info[0]['special']['values']['vmem_peak']) - vmem_mean = int(info[0]['special']['values']['vmem_mean']) - rss_mean = int(info[0]['special']['values']['rss_mean']) - self._memStats = {'vmemPeak': vmem_peak, 'vmemMean': vmem_mean, 'rssMean': rss_mean} - msg.debug('Found these memory stats from {0}: {1}'.format(self._perfMonFile, self._memStats)) - except Exception, e: - msg.info('Failed to process expected perfMon stats file {0}: {1}'.format(self._perfMonFile, e)) - - if 'TXT_JIVEXMLTGZ' in self.conf.dataDictionary.keys(): - #tgzipping JiveXML files - targetTGZName = self.conf.dataDictionary['TXT_JIVEXMLTGZ'].value[0] - if os.path.exists(targetTGZName): - os.remove(targetTGZName) - - import tarfile - fNameRE = re.compile("JiveXML\_\d+\_\d+.xml") - - # force gz compression - tar = tarfile.open(targetTGZName, "w:gz") - for fName in os.listdir('.'): - matches = fNameRE.findall(fName) - if len(matches) > 0: - if fNameRE.findall(fName)[0] == fName: - msg.info('adding %s to %s' % (fName, targetTGZName)) - tar.add(fName) - - tar.close() - msg.info('JiveXML compression: %s has been written and closed.' % (targetTGZName)) - - def validate(self): - self._hasValidated = True - deferredException = None - - ## Our parent will check the RC for us - try: - super(athenaExecutor, self).validate() - except trfExceptions.TransformValidationException, e: - # In this case we hold this exception until the logfile has been scanned - msg.error('Validation of return code failed: {0!s}'.format(e)) - deferredException = e - - # Logfile scan setup - # Always use ignorePatterns from the command line - # For patterns in files, pefer the command line first, then any special settings for - # this executor, then fallback to the standard default (atlas_error_mask.db) - if 'ignorePatterns' in self.conf.argdict: - igPat = self.conf.argdict['ignorePatterns'].value - else: - igPat = [] - if 'ignoreFiles' in self.conf.argdict: - ignorePatterns = trfValidation.ignorePatterns(files = self.conf.argdict['ignoreFiles'].value, extraSearch=igPat) - elif self._errorMaskFiles is not None: - ignorePatterns = trfValidation.ignorePatterns(files = self._errorMaskFiles, extraSearch=igPat) - else: - ignorePatterns = trfValidation.ignorePatterns(files = athenaExecutor._defaultIgnorePatternFile, extraSearch=igPat) - - # Now actually scan my logfile - msg.info('Scanning logfile {0} for errors'.format(self._logFileName)) - self._logScan = trfValidation.athenaLogFileReport(logfile = self._logFileName, ignoreList = ignorePatterns) - worstError = self._logScan.worstError() - - # In general we add the error message to the exit message, but if it's too long then don't do - # that and just say look in the jobReport - if worstError['firstError']: - if len(worstError['firstError']['message']) > athenaExecutor._exitMessageLimit: - if 'CoreDumpSvc' in worstError['firstError']['message']: - exitErrorMessage = "Core dump at line {0} (see jobReport for further details)".format(worstError['firstError']['firstLine']) - elif 'G4Exception' in worstError['firstError']['message']: - exitErrorMessage = "G4 exception at line {0} (see jobReport for further details)".format(worstError['firstError']['firstLine']) - else: - exitErrorMessage = "Long {0} message at line {1} (see jobReport for further details)".format(worstError['level'], worstError['firstError']['firstLine']) - else: - exitErrorMessage = "Logfile error in {0}: \"{1}\"".format(self._logFileName, worstError['firstError']['message']) - else: - exitErrorMessage = "Error level {0} found (see athena logfile for details)".format(worstError['level']) - - # If we failed on the rc, then abort now - if deferredException is not None: - # Add any logfile information we have - if worstError['nLevel'] >= stdLogLevels['ERROR']: - deferredException.errMsg = deferredException.errMsg + "; {0}".format(exitErrorMessage) - raise deferredException - - - # Very simple: if we get ERROR or worse, we're dead, except if ignoreErrors=True - if worstError['nLevel'] == stdLogLevels['ERROR'] and ('ignoreErrors' in self.conf.argdict and self.conf.argdict['ignoreErrors'].value is True): - msg.warning('Found ERRORs in the logfile, but ignoring this as ignoreErrors=True (see jobReport for details)') - elif worstError['nLevel'] >= stdLogLevels['ERROR']: - self._isValidated = False - msg.error('Fatal error in athena logfile (level {0})'.format(worstError['level'])) - raise trfExceptions.TransformLogfileErrorException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), - 'Fatal error in athena logfile: "{0}"'.format(exitErrorMessage)) - - # Must be ok if we got here! - msg.info('Executor {0} has validated successfully'.format(self.name)) - self._isValidated = True - - - ## @brief Detect if AthenaMP is being used for this execution step - # @details Check environment and athena options - # Note that the special config option @c disableMP is used as an override - # so that we do not utilise AthenaMP for smart merging - # @return Tuple of two booleans: first is true if AthenaMPv2 is enabled, second is true - # if AthenaMPv1 is enabled - def _detectAthenaMP(self): - if self.conf._disableMP: - msg.debug('Executor configuration specified disabling AthenaMP') - return False, False - - try: - # First try and detect if any AthenaMP has been enabled - if 'ATHENA_PROC_NUMBER' in os.environ and (int(os.environ['ATHENA_PROC_NUMBER']) is not 0): - msg.info('Detected non-zero ATHENA_PROC_NUMBER ({0}) - setting athenaMP=True flag'.format(os.environ['ATHENA_PROC_NUMBER'])) - athenaMPEnabled = True - elif 'athenaopts' in self.conf.argdict and len([opt for opt in self.conf.argdict['athenaopts'].value if '--nprocs' in opt]) > 0: - msg.info('Detected --nprocs argument for athena - setting athenaMP=True flag') - athenaMPEnabled = True - else: - athenaMPEnabled = False - - # If AthenaMP has not been enabled, we don't care about the version - if not athenaMPEnabled: - msg.info('No AthenaMP options found - assuming normal athena run') - return False, False - - # Now need to see if we're running with AthenaMP v1 or v2. In v1 AthenaMP - # handles all special merging and setup, so we ignore it. In v2 the - # transform takes an active role in smart merging and job setup. - # We signal AthenaMPv1 by returning False, True; v2 by True, False - from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps - if 'Version' in dir(AthenaMPJobProps.AthenaMPFlags): - if AthenaMPJobProps.AthenaMPFlags.Version == 1: - msg.info("AthenaMP properties indicates version 1 - no special AthenaMP processing will be done") - return False, True - elif releaseIsOlderThan(17, 7): - msg.info("Release is older than 17.7, so assuming AthenaMP version 1 - no special AthenaMP processing will be done") - return False, True - return True, False - - except ValueError: - msg.error('Could not understand ATHENA_PROC_NUMBER environment variable (int conversion failed)') - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Invalid ATHENA_PROC_NUMBER environment variable') - - - ## @brief Prepare the correct command line to be used to invoke athena - def _prepAthenaCommandLine(self): - ## Start building up the command line - # N.B. it's possible we might have cases where 'athena' and 'athenaopt' should be substep args - # but at the moment this hasn't been requested. - if 'athena' in self.conf.argdict: - self._exe = self.conf.argdict['athena'].value - self._cmd = [self._exe] - - # See if there's a preloadlibs and a request to update LD_PRELOAD for athena - if 'LD_PRELOAD' in self._envUpdate._envdict: - preLoadUpdated = False - if 'athenaopts' in self.conf.argdict: - for athArg in self.conf.argdict['athenaopts'].value: - # This code is pretty ugly as the athenaopts argument contains - # strings which are really key/value pairs - if athArg.startswith('--preloadlib'): - try: - i = self.conf.argdict['athenaopts'].value.index(athArg) - v = athArg.split('=', 1)[1] - msg.info('Updating athena --preloadlib option with: {0}'.format(self._envUpdate.value('LD_PRELOAD'))) - newPreloads = ":".join(set(v.split(":")) | set(self._envUpdate.value('LD_PRELOAD').split(":"))) - self.conf.argdict['athenaopts']._value[i] = '--preloadlib={0}'.format(newPreloads) - except Exception, e: - msg.warning('Failed to interpret athena option: {0} ({1})'.format(athArg, e)) - preLoadUpdated = True - break - if not preLoadUpdated: - msg.info('Setting athena preloadlibs to: {0}'.format(self._envUpdate.value('LD_PRELOAD'))) - if 'athenaopts' in self.conf.argdict: - self.conf.argdict['athenaopts'].append("--preloadlib={0}".format(self._envUpdate.value('LD_PRELOAD'))) - else: - self.conf.argdict['athenaopts'] = trfArgClasses.argList(["--preloadlib={0}".format(self._envUpdate.value('LD_PRELOAD'))]) - - # Now update command line with the options we have (including any changes to preload) - if 'athenaopts' in self.conf.argdict: - self._cmd.extend(self.conf.argdict['athenaopts'].value) - - ## Add --drop-and-reload if possible (and allowed!) - if self._tryDropAndReload: - if self._athenaMPv1: - msg.info('Disabling "--drop-and-reload" because the job is configured to use AthenaMP v1') - elif 'valgrind' in self.conf._argdict and self.conf._argdict['valgrind'].value is True: - msg.info('Disabling "--drop-and-reload" because the job is configured to use Valgrind') - elif 'athenaopts' in self.conf.argdict: - athenaConfigRelatedOpts = ['--config-only','--drop-and-reload','--drop-configuration','--keep-configuration'] - # Note for athena options we split on '=' so that we properly get the option and not the whole "--option=value" string - conflictOpts = set(athenaConfigRelatedOpts).intersection(set([opt.split('=')[0] for opt in self.conf.argdict['athenaopts'].value])) - if len(conflictOpts) > 0: - msg.info('Not appending "--drop-and-reload" to athena command line because these options conflict: {0}'.format(list(conflictOpts))) - else: - msg.info('Appending "--drop-and-reload" to athena options') - self._cmd.append('--drop-and-reload') - else: - # This is the 'standard' case - so drop and reload should be ok - msg.info('Appending "--drop-and-reload" to athena options') - self._cmd.append('--drop-and-reload') - else: - msg.info('Skipping test for "--drop-and-reload" in this executor') - - # Add topoptions - if self._skeleton is not None: - self._cmd += self._topOptionsFiles - msg.info('Updated script arguments with topoptions: %s' % self._cmd) - - - ## @brief Write a wrapper script which runs asetup and then Athena. - def _writeAthenaWrapper( - self, - asetup = None, - dbsetup = None - ): - self._originalCmd = self._cmd - self._asetup = asetup - self._dbsetup = dbsetup - self._wrapperFile = 'runwrapper.{name}.sh'.format(name = self._name) - msg.debug( - 'Preparing wrapper file {wrapperFileName} with ' + - 'asetup={asetupStatus} and dbsetup={dbsetupStatus}'.format( - wrapperFileName = self._wrapperFile, - asetupStatus = self._asetup, - dbsetupStatus = self._dbsetup - ) - ) - try: - with open(self._wrapperFile, 'w') as wrapper: - print >>wrapper, '#! /bin/sh' - if asetup: - print >>wrapper, "# asetup" - print >>wrapper, 'echo Sourcing {AtlasSetupDirectory}/scripts/asetup.sh {asetupStatus}'.format( - AtlasSetupDirectory = os.environ['AtlasSetup'], - asetupStatus = asetup - ) - print >>wrapper, 'source {AtlasSetupDirectory}/scripts/asetup.sh {asetupStatus}'.format( - AtlasSetupDirectory = os.environ['AtlasSetup'], - asetupStatus = asetup - ) - print >>wrapper, 'if [ ${?} != "0" ]; then exit 255; fi' - if dbsetup: - dbroot = path.dirname(dbsetup) - dbversion = path.basename(dbroot) - print >>wrapper, "# DBRelease setup" - print >>wrapper, 'echo Setting up DBRelease {dbroot} environment'.format(dbroot = dbroot) - print >>wrapper, 'export DBRELEASE={dbversion}'.format(dbversion = dbversion) - print >>wrapper, 'export CORAL_AUTH_PATH={directory}'.format(directory = path.join(dbroot, 'XMLConfig')) - print >>wrapper, 'export CORAL_DBLOOKUP_PATH={directory}'.format(directory = path.join(dbroot, 'XMLConfig')) - print >>wrapper, 'export TNS_ADMIN={directory}'.format(directory = path.join(dbroot, 'oracle-admin')) - print >>wrapper, 'DATAPATH={dbroot}:$DATAPATH'.format(dbroot = dbroot) - if self.conf._disableMP: - print >>wrapper, "# AthenaMP explicitly disabled for this executor" - print >>wrapper, "unset ATHENA_PROC_NUMBER" - if self._envUpdate.len > 0: - print >>wrapper, "# Customised environment" - for envSetting in self._envUpdate.values: - if not envSetting.startswith('LD_PRELOAD'): - print >>wrapper, "export", envSetting - # If Valgrind is engaged, a serialised Athena configuration file - # is generated for use with a subsequent run of Athena with - # Valgrind. - if 'valgrind' in self.conf._argdict and self.conf._argdict['valgrind'].value is True: - msg.info('Valgrind engaged') - # Define the file name of the serialised Athena - # configuration. - AthenaSerialisedConfigurationFile = "{name}Conf.pkl".format( - name = self._name - ) - # Run Athena for generation of its serialised configuration. - print >>wrapper, ' '.join(self._cmd), "--config-only={0}".format(AthenaSerialisedConfigurationFile) - print >>wrapper, 'if [ $? != "0" ]; then exit 255; fi' - # Generate a Valgrind command, using default or basic - # options as requested and extra options as requested. - if 'valgrindbasicopts' in self.conf._argdict: - basicOptionsList = self.conf._argdict['valgrindbasicopts'].value - else: - basicOptionsList = None - if 'valgrindextraopts' in self.conf._argdict: - extraOptionsList = self.conf._argdict['valgrindextraopts'].value - else: - extraOptionsList = None - msg.debug("requested Valgrind command basic options: {options}".format(options = basicOptionsList)) - msg.debug("requested Valgrind command extra options: {options}".format(options = extraOptionsList)) - command = ValgrindCommand( - basicOptionsList = basicOptionsList, - extraOptionsList = extraOptionsList, - AthenaSerialisedConfigurationFile = \ - AthenaSerialisedConfigurationFile - ) - msg.debug("Valgrind command: {command}".format(command = command)) - print >>wrapper, command - else: - msg.info('Valgrind not engaged') - # run Athena command - print >>wrapper, ' '.join(self._cmd) - os.chmod(self._wrapperFile, 0755) - except (IOError, OSError) as e: - errMsg = 'error writing athena wrapper {fileName}: {error}'.format( - fileName = self._wrapperFile, - error = e - ) - msg.error(errMsg) - raise trfExceptions.TransformExecutionException( - trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'), - errMsg - ) - self._cmd = [path.join('.', self._wrapperFile)] - - - ## @brief Manage smart merging of output files - # @param fileArg File argument to merge - def _smartMerge(self, fileArg): - ## @note Produce a list of merge jobs - this is a list of lists - # @todo This should be configurable! - # @note Value is set very low for now for testing - - ## @note only file arguments which support selfMerge() can be merged - if 'selfMerge' not in dir(fileArg): - msg.info('Files in {0} cannot merged (no selfMerge() method is implemented)'.format(fileArg.name)) - return - - if fileArg.mergeTargetSize == 0: - msg.info('Files in {0} will not be merged as target size is set to 0)'.format(fileArg.name)) - return - - - mergeCandidates = [list()] - currentMergeSize = 0 - for fname in fileArg.value: - size = fileArg.getSingleMetadata(fname, 'file_size') - if type(size) not in (int, long): - msg.warning('File size metadata for {0} was not correct, found type {1}. Aborting merge attempts.'.format(fileArg, type(size))) - return - # if there is no file in the job, then we must add it - if len(mergeCandidates[-1]) == 0: - msg.debug('Adding file {0} to current empty merge list'.format(fname)) - mergeCandidates[-1].append(fname) - currentMergeSize += size - continue - # see if adding this file gets us closer to the target size (but always add if target size is negative) - if fileArg.mergeTargetSize < 0 or math.fabs(currentMergeSize + size - fileArg.mergeTargetSize) < math.fabs(currentMergeSize - fileArg.mergeTargetSize): - msg.debug('Adding file {0} to merge list {1} as it gets closer to the target size'.format(fname, mergeCandidates[-1])) - mergeCandidates[-1].append(fname) - currentMergeSize += size - continue - # close this merge list and start a new one - msg.debug('Starting a new merge list with file {0}'.format(fname)) - mergeCandidates.append([fname]) - currentMergeSize = size - - msg.debug('First pass splitting will merge files in this way: {0}'.format(mergeCandidates)) - - counter = 0 - for mergeGroup in mergeCandidates: - counter += 1 - # If we only have one merge group, then preserve the original name (important for - # prodsys v1). Otherwise we use the new merged names. - if len(mergeCandidates) == 1: - mergeName = fileArg.originalName - else: - mergeName = fileArg.originalName + '.merge.{0}'.format(counter) - msg.info('Want to merge files {0} to {1}'.format(mergeGroup, mergeName)) - if len(mergeGroup) <= 1: - msg.info('Skip merging for single file') - else: - ## We want to parallelise this part! - fileArg.selfMerge(output=mergeName, inputs=mergeGroup, argdict=self.conf.argdict) - -## @brief Athena executor where failure is not consisered fatal -class optionalAthenaExecutor(athenaExecutor): - - # Here we validate, but will suppress any errors - def validate(self): - try: - super(optionalAthenaExecutor, self).validate() - except trfExceptions.TransformValidationException, e: - # In this case we hold this exception until the logfile has been scanned - msg.warning('Validation failed for {0}: {1}'.format(self._name, e)) - self._isValidated = False - self._errMsg = e.errMsg - self._rc = e.errCode - - -class hybridPOOLMergeExecutor(athenaExecutor): - ## @brief Initialise hybrid POOL merger athena executor - # @param name Executor name - # @param trf Parent transform - # @param skeletonFile athena skeleton job options file - # @param exe Athena execution script - # @param exeArgs Transform argument names whose value is passed to athena - # @param substep The athena substep this executor represents - # @param inputEventTest Boolean switching the skipEvents < inputEvents test - # @param perfMonFile Name of perfmon file for this substep (used to retrieve vmem/rss information) - # @param tryDropAndReload Boolean switch for the attempt to add '--drop-and-reload' to athena args - # @param hybridMerge Boolean activating hybrid merger (if set to 'None' then the hybridMerge will - # be used if n_inputs <= 16, otherwise a classic merge will happen for better downstream i/o - # performance) - def __init__(self, name = 'hybridPOOLMerge', trf = None, conf = None, skeletonFile = 'RecJobTransforms/skeleton.MergePool_tf.py', inData = set(), - outData = set(), exe = 'athena.py', exeArgs = ['athenaopts'], substep = None, inputEventTest = True, - perfMonFile = None, tryDropAndReload = True, hybridMerge = None, extraRunargs = {}, - manualDataDictionary = None): - - # By default we will do a hybridMerge - self._hybridMerge = hybridMerge - self._hybridMergeTmpFile = 'events.pool.root' - super(hybridPOOLMergeExecutor, self).__init__(name, trf=trf, conf=conf, skeletonFile=skeletonFile, inData=inData, - outData=outData, exe=exe, exeArgs=exeArgs, substep=substep, - inputEventTest=inputEventTest, perfMonFile=perfMonFile, - tryDropAndReload=tryDropAndReload, extraRunargs=extraRunargs, - manualDataDictionary=manualDataDictionary) - - def preExecute(self, input = set(), output = set()): - # Now check to see if the fastPoolMerger option was set - if 'fastPoolMerge' in self.conf.argdict: - msg.info('Setting hybrid merge to {0}'.format(self.conf.argdict['fastPoolMerge'].value)) - self._hybridMerge = self.conf.argdict['fastPoolMerge'].value - else: - # Hybrid merging really needs some proper validation, so only use - # it if specifically requested - msg.info("Automatic hybrid merging is disabled use the '--fastPoolMerge' flag if you want to switch it on") - self._hybridMerge = False - - if self._hybridMerge: - # If hybridMerge is activated then we process no events at the athena step, - # so set a ridiculous skipEvents value - msg.info("Setting skipEvents=1000000 to skip event processing during athena metadata merge") - self._extraRunargs.update({'skipEvents': 1000000}) - - super(hybridPOOLMergeExecutor, self).preExecute(input=input, output=output) - - - def execute(self): - # First call the parent executor, which will manage the athena execution for us - super(hybridPOOLMergeExecutor, self).execute() - - # Now, do we need to do the fast event merge? - if not self._hybridMerge: - return - - # Save the stub file for debugging... - stubFile = self.conf.dataDictionary[list(self._output)[0]].value[0] - stubFileSave = stubFile + ".tmp" - msg.info('Saving metadata stub file {0} to {1}'.format(stubFile, stubFileSave)) - shutil.copy(stubFile, stubFileSave) - - # Now do the hybrid merge steps - note we disable checkEventCount for this - it doesn't make sense here - fastConf = copy.copy(self.conf) - fastConf.addToArgdict('checkEventCount', trfArgClasses.argSubstepBool("all:False", runarg=False)) - fastEventMerge1 = scriptExecutor(name='fastEventMerge_step1', conf=fastConf, inData=self._inData, outData=self._outData, - exe='mergePOOL.exe', exeArgs=None) - fastEventMerge1._cmd = ['mergePOOL.exe', '-o', self._hybridMergeTmpFile] - for fname in self.conf.dataDictionary[list(self._input)[0]].value: - fastEventMerge1._cmd.extend(['-i', fname]) - fastEventMerge1._cmd.extend(['-e', 'MetaData', '-e', 'MetaDataHdrDataHeaderForm', '-e', 'MetaDataHdrDataHeader', '-e', 'MetaDataHdr']) - - msg.debug('Constructed this command line for fast event merge step 1: {0}'.format(fastEventMerge1._cmd)) - fastEventMerge1.doAll() - - - fastEventMerge2 = scriptExecutor(name='fastEventMerge_step2', conf=fastConf, inData=self._inData, outData=self._outData, - exe='mergePOOL.exe', exeArgs=None) - fastEventMerge2._cmd = ['mergePOOL.exe', '-o', self._hybridMergeTmpFile] - fastEventMerge2._cmd.extend(['-i', self.conf.dataDictionary[list(self._output)[0]].value[0]]) - - msg.debug('Constructed this command line for fast event merge step 2: {0}'.format(fastEventMerge2._cmd)) - fastEventMerge2.doAll() - - # Ensure we count all the mergePOOL.exe stuff in the resource report - self._exeStop = os.times() - - # And finally... - msg.info('Renaming {0} to {1}'.format(self._hybridMergeTmpFile, self.conf.dataDictionary[list(self._output)[0]].value[0])) - try: - os.rename(self._hybridMergeTmpFile, self.conf.dataDictionary[list(self._output)[0]].value[0]) - self.conf.dataDictionary[list(self._output)[0]]._resetMetadata() - # Stupid PoolFileCatalog now has the wrong GUID for the output file. Delete it for safety. - if os.access('PoolFileCatalog.xml', os.R_OK): - os.unlink('PoolFileCatalog.xml') - except (IOError, OSError) as e: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), - 'Exception raised when renaming {0} to {1}: {2}'.format(self._hybridMergeTmpFile, self.conf.dataDictionary[list(self._output)[0]].value[0], e)) - - -## @brief Specialist executor to manage the handling of multiple implicit input -# and output files within the reduction framework. -class reductionFrameworkExecutor(athenaExecutor): - - ## @brief Take inputDAODFile and setup the actual outputs needed - # in this job. - def preExecute(self, input=set(), output=set()): - msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - - if 'reductionConf' not in self.conf.argdict: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), - 'No reduction configuration specified') - if 'DAOD' not in output: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), - 'No base name for DAOD reduction') - - for reduction in self.conf.argdict['reductionConf'].value: - dataType = 'DAOD_' + reduction - outputName = 'DAOD_' + reduction + '.' + self.conf.argdict['outputDAODFile'].value[0] - msg.info('Adding reduction output type {0}'.format(dataType)) - output.add(dataType) - newReduction = trfArgClasses.argPOOLFile(outputName, io='output', runarg=True, type='aod', - name=reduction) - # References to _trf - can this be removed? - self.conf.dataDictionary[dataType] = newReduction - - # Clean up the stub file from the executor input and the transform's data dictionary - # (we don't remove the actual argFile instance) - output.remove('DAOD') - del self.conf.dataDictionary['DAOD'] - del self.conf.argdict['outputDAODFile'] - - msg.info('Data dictionary is now: {0}'.format(self.conf.dataDictionary)) - msg.info('Input/Output: {0}/{1}'.format(input, output)) - - super(reductionFrameworkExecutor, self).preExecute(input, output) - - -## @brief Specialist executor to manage the handling of multiple implicit input -# and output files within the reduction framework. -# @note This is the temporary executor used for NTUP->DNTUP. It will be dropped -# after the move to D(x)AOD. -class reductionFrameworkExecutorNTUP(athenaExecutor): - - ## @brief Take inputDNTUPFile and setup the actual outputs needed - # in this job. - def preExecute(self, input=set(), output=set()): - msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - - if 'reductionConf' not in self.conf.argdict: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), - 'No reduction configuration specified') - if 'DNTUP' not in output: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_REDUCTION_CONFIG_ERROR'), - 'No base name for DNTUP reduction') - - for reduction in self.conf.argdict['reductionConf'].value: - dataType = 'DNTUP_' + reduction - # Prodsys 1 request - don't add a suffix, but replace DNTUP with DNTUP_TYPE - outputName = self.conf.argdict['outputDNTUPFile'].value[0].replace('DNTUP', dataType) - if outputName == self.conf.argdict['outputDNTUPFile'].value[0]: - # Rename according to the old scheme - outputName = self.conf.argdict['outputDNTUPFile'].value[0] + '_' + reduction + '.root' - msg.info('Adding reduction output type {0}, target filename {1}'.format(dataType, outputName)) - output.add(dataType) - newReduction = trfArgClasses.argNTUPFile(outputName, io='output', runarg=True, type='NTUP', subtype=dataType, - name=reduction, treeNames=['physics']) - self.conf.dataDictionary[dataType] = newReduction - - # Clean up the stub file from the executor input and the transform's data dictionary - # (we don't remove the actual argFile instance) - output.remove('DNTUP') - del self.conf.dataDictionary['DNTUP'] - del self.conf.argdict['outputDNTUPFile'] - - msg.info('Data dictionary is now: {0}'.format(self.conf.dataDictionary)) - msg.info('Input/Output: {0}/{1}'.format(input, output)) - - super(reductionFrameworkExecutorNTUP, self).preExecute(input, output) - - -## @brief Specialist execution class for merging DQ histograms -class DQMergeExecutor(scriptExecutor): - def __init__(self, name='DQHistMerge', trf=None, conf=None, inData=set(['HIST_AOD', 'HIST_ESD']), outData=set(['HIST']), - exe='DQHistogramMerge.py', exeArgs = []): - - self._histMergeList = 'HISTMergeList.txt' - - super(DQMergeExecutor, self).__init__(name=name, trf=trf, conf=conf, inData=inData, outData=outData, exe=exe, exeArgs=exeArgs) - - - def preExecute(self, input = set(), output = set()): - msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - - super(DQMergeExecutor, self).preExecute(input=input, output=output) - - # Write the list of files to be merged - with open(self._histMergeList, 'w') as DQMergeFile: - for dataType in input: - for fname in self.conf.dataDictionary[dataType].value: - self.conf.dataDictionary[dataType]._getNumberOfEvents([fname]) - print >>DQMergeFile, fname - - self._cmd.append(self._histMergeList) - - # Add the output file - if len(output) != 1: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), - 'One (and only one) output file must be given to {0} (got {1})'.format(self.name, len(output))) - outDataType = list(output)[0] - self._cmd.append(self.conf.dataDictionary[outDataType].value[0]) - - # Set the run_post_processing to False - self._cmd.append('False') - - -## @brief Specialist execution class for merging NTUPLE files -class NTUPMergeExecutor(scriptExecutor): - - def preExecute(self, input = set(), output = set()): - msg.debug('[NTUP] Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - - # Basic command, and allow overwrite of the output file - if self._exe is None: - self._exe = 'hadd' - self._cmd = [self._exe, "-f"] - - - # Add the output file - if len(output) != 1: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), - 'One (and only one) output file must be given to {0} (got {1})'.format(self.name, len(output))) - outDataType = list(output)[0] - self._cmd.append(self.conf.dataDictionary[outDataType].value[0]) - # Add to be merged to the cmd chain - for dataType in input: - self._cmd.extend(self.conf.dataDictionary[dataType].value) - - super(NTUPMergeExecutor, self).preExecute(input=input, output=output) - -## @brief Specalise the athena executor to deal with the BS merge oddity of excluding empty DRAWs -class bsMergeExecutor(scriptExecutor): - - def preExecute(self, input = set(), output = set()): - self._maskedFiles = [] - if 'BS' in self.conf.argdict and 'maskEmptyInputs' in self.conf.argdict and self.conf.argdict['maskEmptyInputs'].value is True: - eventfullFiles = [] - for fname in self.conf.dataDictionary['BS'].value: - nEvents = self.conf.dataDictionary['BS'].getSingleMetadata(fname, 'nentries') - msg.debug('Found {0} events in file {1}'.format(nEvents, fname)) - if isinstance(nEvents, int) and nEvents > 0: - eventfullFiles.append(fname) - self._maskedFiles = list(set(self.conf.dataDictionary['BS'].value) - set(eventfullFiles)) - if len(self._maskedFiles) > 0: - msg.info('The following input files are masked because they have 0 events: {0}'.format(' '.join(self._maskedFiles))) - if len(self.conf.dataDictionary['BS'].value) == 0: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), - 'All input files had zero events - aborting BS merge') - - # Write the list of input files to a text file, so that testMergedFiles can swallow it - self._mergeBSFileList = '{0}.list'.format(self._exe) - self._mergeBSLogfile = '{0}.out'.format(self._exe) - try: - with open(self._mergeBSFileList, 'w') as BSFileList: - for fname in self.conf.dataDictionary['BS'].value: - if fname not in self._maskedFiles: - print >>BSFileList, fname - except (IOError, OSError) as e: - errMsg = 'Got an error when writing list of BS files to {0}: {1}'.format(self._mergeBSFileList, e) - msg.error(errMsg) - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'), errMsg) - - # Hope that we were given a correct filename... - self._outputFilename = self.conf.dataDictionary['BS_MRG'].value[0] - if self._outputFilename.endswith('._0001.data'): - self._doRename = False - self._outputFilename = self._outputFilename.split('._0001.data')[0] - elif self.conf.argdict['allowRename'].value == True: - # OK, non-fatal, we go for a renaming - msg.info('Output filename does not end in "._0001.data" will proceed, but be aware that the internal filename metadata will be wrong') - self._doRename = True - else: - # No rename allowed, so we are dead... - errmsg = 'Output filename for outputBS_MRGFile must end in "._0001.data" or infile metadata will be wrong' - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), errmsg) - - # Set the correct command for execution - self._cmd = [self._exe, self._mergeBSFileList, '0', self._outputFilename] - - super(bsMergeExecutor, self).preExecute(input=input, output=output) - - - def postExecute(self): - if self._doRename: - self._expectedOutput = self._outputFilename + '._0001.data' - msg.info('Renaming {0} to {1}'.format(self._expectedOutput, self.conf.dataDictionary['BS_MRG'].value[0])) - try: - os.rename(self._outputFilename + '._0001.data', self.conf.dataDictionary['BS_MRG'].value[0]) - except OSError, e: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), - 'Exception raised when renaming {0} to {1}: {2}'.format(self._outputFilename, self.conf.dataDictionary['BS_MRG'].value[0], e)) - super(bsMergeExecutor, self).postExecute() - - - -class tagMergeExecutor(scriptExecutor): - - def preExecute(self, input = set(), output = set()): - # Just need to write the customised CollAppend command line - self._cmd = [self._exe, '-src'] - for dataType in input: - for fname in self.conf.dataDictionary[dataType].value: - self._cmd.extend(['PFN:{0}'.format(fname), 'RootCollection']) - self._cmd.extend(['-dst', 'PFN:{0}'.format(self.conf.dataDictionary[list(output)[0]].value[0]), 'RootCollection', '-nevtcached', '5000']) - - # In AthenaMP jobs the output file can be created empty, which CollAppend does not like - # so remove it - if os.access(self.conf.dataDictionary[list(output)[0]].value[0], os.F_OK): - os.unlink(self.conf.dataDictionary[list(output)[0]].value[0]) - - super(tagMergeExecutor, self).preExecute(input=input, output=output) - - - def validate(self): - super(tagMergeExecutor, self).validate() - - # Now scan the logfile... - try: - msg.debug('Scanning TAG merging logfile {0}'.format(self._logFileName)) - with open(self._logFileName) as logfile: - for line in logfile: - # Errors are signaled by 'error' (case independent) and NOT ('does not exist' or 'hlterror') - # Logic copied from Tier 0 TAGMerge_trf.py - if 'error' in line.lower(): - if 'does not exist' in line: - continue - if 'hlterror' in line: - continue - raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), - 'Found this error message in the logfile {0}: {1}'.format(self._logFileName, line)) - except (OSError, IOError) as e: - raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_LOGERROR'), - 'Exception raised while attempting to scan logfile {0}: {1}'.format(self._logFileName, e)) - - -## @brief Archive transform - use tar -class archiveExecutor(scriptExecutor): - - def preExecute(self, input = set(), output = set()): - # Set the correct command for execution - self._cmd = [self._exe, '-c', '-v',] - if 'compressionType' in self.conf.argdict.keys(): - if self.conf.argdict['compressionType'] == 'gzip': - self._cmd.append('-z') - elif self.conf.argdict['compressionType'] == 'bzip2': - self._cmd.append('-j') - elif self.conf.argdict['compressionType'] == 'none': - pass - self._cmd.extend(['-f', self.conf.argdict['outputArchFile'].value[0]]) - self._cmd.extend(self.conf.argdict['inputDataFile'].value) - - super(archiveExecutor, self).preExecute(input=input, output=output) - diff --git a/Tools/PyJobTransforms/python/trfExe.py b/Tools/PyJobTransforms/python/trfExe.py index 685b38e39271ea37aa9abe1dc5d8383dc4345721..ccbcd5312dbfb0c0b1dd7abad97f138278ecee9f 100755 --- a/Tools/PyJobTransforms/python/trfExe.py +++ b/Tools/PyJobTransforms/python/trfExe.py @@ -5,7 +5,7 @@ # @brief Transform execution functions # @details Standard transform executors # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfExe.py 665892 2015-05-08 14:54:36Z graemes $ +# @version $Id: trfExe.py 679938 2015-07-02 22:09:59Z graemes $ import copy import json @@ -19,8 +19,6 @@ import subprocess import sys import time -from xml.etree import ElementTree - import logging msg = logging.getLogger(__name__) @@ -28,6 +26,7 @@ from PyJobTransforms.trfJobOptions import JobOptionsTemplate from PyJobTransforms.trfUtils import asetupReport, unpackDBRelease, setupDBRelease, cvmfsDBReleaseCheck, forceToAlphaNum, releaseIsOlderThan, ValgrindCommand from PyJobTransforms.trfExitCodes import trfExit from PyJobTransforms.trfLogger import stdLogLevels +from PyJobTransforms.trfMPTools import detectAthenaMPProcs, athenaMPOutputHandler import PyJobTransforms.trfExceptions as trfExceptions @@ -164,6 +163,9 @@ class transformExecutor(object): # enabled). self._exeStart = self._exeStop = None self._memStats = {} + self._eventCount = None + self._athenaMP = None + self._dbMonitor = None ## Now define properties for these data members @@ -323,7 +325,18 @@ class transformExecutor(object): @property def memStats(self): return self._memStats + + @property + def eventCount(self): + return self._eventCount + + @property + def athenaMP(self): + return self._athenaMP + @property + def dbMonitor(self): + return self._dbMonitor def preExecute(self, input = set(), output = set()): msg.info('Preexecute for %s' % self._name) @@ -587,7 +600,7 @@ class scriptExecutor(transformExecutor): while (not mem_proc.poll()) and countWait < 10: time.sleep(0.1) countWait += 1 - except OSError: + except OSError, UnboundLocalError: pass @@ -633,6 +646,7 @@ class scriptExecutor(transformExecutor): else: checkcount=trfValidation.eventMatch(self) checkcount.decide() + self._eventCount = checkcount.eventCount msg.info('Event counting for substep {0} passed'.format(self.name)) @@ -646,7 +660,9 @@ class athenaExecutor(scriptExecutor): # @param trf Parent transform # @param skeletonFile athena skeleton job options file (optionally this can be a list of skeletons # that will be given to athena.py in order); can be set to @c None to disable writing job options - # files at all + # files at all + # @param inputDataTypeCountCheck List of input datatypes to apply preExecute event count checks to; + # default is @c None, which means check all inputs # @param exe Athena execution script # @param exeArgs Transform argument names whose value is passed to athena # @param substep The athena substep this executor represents (alias for the name) @@ -670,13 +686,12 @@ class athenaExecutor(scriptExecutor): # that a string can be interpreted at runtime; @c literalRunargs allows the direct insertion of arbitary python # snippets into the runArgs file. def __init__(self, name = 'athena', trf = None, conf = None, skeletonFile = 'PyJobTransforms/skeleton.dummy.py', inData = set(), - outData = set(), exe = 'athena.py', exeArgs = ['athenaopts'], substep = None, inputEventTest = True, + outData = set(), inputDataTypeCountCheck = None, exe = 'athena.py', exeArgs = ['athenaopts'], substep = None, inputEventTest = True, perfMonFile = None, tryDropAndReload = True, extraRunargs = {}, runtimeRunargs = {}, literalRunargs = [], dataArgs = [], checkEventCount = False, errorMaskFiles = None, manualDataDictionary = None, memMonitor = True): self._substep = forceToAlphaNum(substep) - self._athenaMP = None # As yet unknown; N.B. this flag is used for AthenaMP version 2+. For AthenaMP-I it is set to False self._inputEventTest = inputEventTest self._tryDropAndReload = tryDropAndReload self._extraRunargs = extraRunargs @@ -684,6 +699,7 @@ class athenaExecutor(scriptExecutor): self._literalRunargs = literalRunargs self._dataArgs = dataArgs self._errorMaskFiles = errorMaskFiles + self._inputDataTypeCountCheck = inputDataTypeCountCheck if perfMonFile: self._perfMonFile = None @@ -703,12 +719,18 @@ class athenaExecutor(scriptExecutor): # Setup JO templates if self._skeleton is not None: - self._jobOptionsTemplate = JobOptionsTemplate(exe = self, version = '$Id: trfExe.py 665892 2015-05-08 14:54:36Z graemes $') + self._jobOptionsTemplate = JobOptionsTemplate(exe = self, version = '$Id: trfExe.py 679938 2015-07-02 22:09:59Z graemes $') else: self._jobOptionsTemplate = None - + @property + def inputDataTypeCountCheck(self): + return self._inputDataTypeCountCheck + @inputDataTypeCountCheck.setter + def inputDataTypeCountCheck(self, value): + self._inputDataTypeCountCheck = value + @property def substep(self): return self._substep @@ -716,13 +738,55 @@ class athenaExecutor(scriptExecutor): def preExecute(self, input = set(), output = set()): msg.debug('Preparing for execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) - # Try to detect AthenaMP mode - # The first flag indicates if the transform needs to handle the AthenaMP merging (i.e., AthenaMP v2) - # The first flag is set true in order to disable the --drop-and-reload option because AthenaMP v1 - # cannot handle it - self._athenaMP, self._athenaMPv1 = self._detectAthenaMP() + # Check we actually have events to process! + inputEvents = 0 + dt = "" + if self._inputDataTypeCountCheck is None: + self._inputDataTypeCountCheck = input + for dataType in self._inputDataTypeCountCheck: + thisInputEvents = self.conf.dataDictionary[dataType].nentries + if thisInputEvents > inputEvents: + inputEvents = thisInputEvents + dt = dataType + + # Now take into account skipEvents and maxEvents + if ('skipEvents' in self.conf.argdict and + self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) is not None): + mySkipEvents = self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) + else: + mySkipEvents = 0 - # And if this is athenaMP, then set some options for workers and output file report + if ('maxEvents' in self.conf.argdict and + self.conf.argdict['maxEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) is not None): + myMaxEvents = self.conf.argdict['maxEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) + else: + myMaxEvents = -1 + + # Any events to process...? + if (self._inputEventTest and mySkipEvents > 0 and mySkipEvents >= inputEvents): + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_NOEVENTS'), + 'No events to process: {0} (skipEvents) >= {1} (inputEvents of {2}'.format(mySkipEvents, inputEvents, dt)) + + # Expected events to process + if (myMaxEvents != -1): + expectedEvents = min(inputEvents-mySkipEvents, myMaxEvents) + else: + expectedEvents = inputEvents-mySkipEvents + + # Try to detect AthenaMP mode and number of workers + if self.conf._disableMP: + self._athenaMP = 0 + else: + self._athenaMP = detectAthenaMPProcs(self.conf.argdict) + + # Small hack to detect cases where there are so few events that it's not worthwhile running in MP mode + # which also avoids issues with zero sized files + if expectedEvents < self._athenaMP: + msg.info("Disabling AthenaMP as number of input events to process is too low ({0} events for {1} workers)".format(expectedEvents, self._athenaMP)) + self.conf._disableMP = True + self._athenaMP = 0 + + # And if this is (still) athenaMP, then set some options for workers and output file report if self._athenaMP: self._athenaMPWorkerTopDir = 'athenaMP-workers-{0}-{1}'.format(self._name, self._substep) self._athenaMPFileReport = 'athenaMP-outputs-{0}-{1}'.format(self._name, self._substep) @@ -739,19 +803,6 @@ class athenaExecutor(scriptExecutor): self._athenaMPWorkerTopDir = self._athenaMPFileReport = None - # Check we actually have events to process! - if (self._inputEventTest and 'skipEvents' in self.conf.argdict and - self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) is not None): - msg.debug('Will test for events to process') - for dataType in input: - inputEvents = self.conf.dataDictionary[dataType].nentries - msg.debug('Got {0} events for {1}'.format(inputEvents, dataType)) - if not isinstance(inputEvents, (int, long)): - msg.warning('Are input events countable? Got nevents={0} so disabling event count check for this input'.format(inputEvents)) - elif self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor) >= inputEvents: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_NOEVENTS'), - 'No events to process: {0} (skipEvents) >= {1} (inputEvents of {2}'.format(self.conf.argdict['skipEvents'].returnMyValue(name=self._name, substep=self._substep, first=self.conf.firstExecutor), inputEvents, dataType)) - ## Write the skeleton file and prep athena if self._skeleton is not None: inputFiles = dict() @@ -834,68 +885,15 @@ class athenaExecutor(scriptExecutor): # If this was an athenaMP run then we need to update output files if self._athenaMP: - if path.exists(self._athenaMPFileReport): - try: - try: - outputFileArgs = [ self.conf.dataDictionary[dataType] for dataType in self._output ] - except KeyError, e: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC'), - 'Failed to find output file argument instances for outputs {0} in {1}'.format(self.outData, self.name)) - mpOutputs = ElementTree.ElementTree() - mpOutputs.parse(self._athenaMPFileReport) - for filesElement in mpOutputs.getroot().getiterator(tag='Files'): - msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib)) - originalArg = None - originalName = filesElement.attrib['OriginalName'] - for fileArg in outputFileArgs: - if fileArg.value[0] == originalName: - originalArg = fileArg - break - if originalArg is None: - msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(originalName)) - continue - msg.debug('Found matching argument {0}'.format(originalArg)) - fileNameList = [] - for fileElement in filesElement.getiterator(tag='File'): - msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib)) - fileNameList.append(fileElement.attrib['name']) - # Now update argument with the new name list and reset metadata - originalArg.multipleOK = True - originalArg.value = fileNameList - originalArg.originalName = originalName - msg.debug('Argument {0} value now {1}'.format(originalArg, originalArg.value)) - # Merge? - if originalArg.io is 'output' and len(originalArg.value) > 1: - msg.debug('{0} files {1} are candidates for smart merging'.format(originalArg.name, originalArg.value)) - self._smartMerge(originalArg) - except Exception, e: - msg.error('Exception thrown when processing athenaMP outputs report {0}: {1}'.format(self._athenaMPFileReport, e)) - msg.error('Validation is now very likely to fail') - raise - else: - msg.warning('AthenaMP run was set to True, but no outputs file was found') - - - if 'TXT_JIVEXMLTGZ' in self.conf.dataDictionary.keys(): - #tgzipping JiveXML files - targetTGZName = self.conf.dataDictionary['TXT_JIVEXMLTGZ'].value[0] - if os.path.exists(targetTGZName): - os.remove(targetTGZName) - - import tarfile - fNameRE = re.compile("JiveXML\_\d+\_\d+.xml") - - # force gz compression - tar = tarfile.open(targetTGZName, "w:gz") - for fName in os.listdir('.'): - matches = fNameRE.findall(fName) - if len(matches) > 0: - if fNameRE.findall(fName)[0] == fName: - msg.info('adding %s to %s' % (fName, targetTGZName)) - tar.add(fName) - - tar.close() - msg.info('JiveXML compression: %s has been written and closed.' % (targetTGZName)) + outputDataDictionary = dict([ (dataType, self.conf.dataDictionary[dataType]) for dataType in self._output ]) + athenaMPOutputHandler(self._athenaMPFileReport, self._athenaMPWorkerTopDir, outputDataDictionary, self._athenaMP) + for dataType in self._output: + if self.conf.dataDictionary[dataType].io == "output" and len(self.conf.dataDictionary[dataType].value) > 1: + self._smartMerge(self.conf.dataDictionary[dataType]) + + if 'TXT_JIVEXMLTGZ' in self.conf.dataDictionary: + self._targzipJiveXML() + def validate(self): self._hasValidated = True @@ -928,6 +926,8 @@ class athenaExecutor(scriptExecutor): msg.info('Scanning logfile {0} for errors'.format(self._logFileName)) self._logScan = trfValidation.athenaLogFileReport(logfile = self._logFileName, ignoreList = ignorePatterns) worstError = self._logScan.worstError() + self._dbMonitor = self._logScan.dbMonitor() + # In general we add the error message to the exit message, but if it's too long then don't do # that and just say look in the jobReport @@ -965,53 +965,6 @@ class athenaExecutor(scriptExecutor): msg.info('Executor {0} has validated successfully'.format(self.name)) self._isValidated = True - - ## @brief Detect if AthenaMP is being used for this execution step - # @details Check environment and athena options - # Note that the special config option @c disableMP is used as an override - # so that we do not utilise AthenaMP for smart merging - # @return Tuple of two booleans: first is true if AthenaMPv2 is enabled, second is true - # if AthenaMPv1 is enabled - def _detectAthenaMP(self): - if self.conf._disableMP: - msg.debug('Executor configuration specified disabling AthenaMP') - return False, False - - try: - # First try and detect if any AthenaMP has been enabled - if 'ATHENA_PROC_NUMBER' in os.environ and (int(os.environ['ATHENA_PROC_NUMBER']) is not 0): - msg.info('Detected non-zero ATHENA_PROC_NUMBER ({0}) - setting athenaMP=True flag'.format(os.environ['ATHENA_PROC_NUMBER'])) - athenaMPEnabled = True - elif 'athenaopts' in self.conf.argdict and len([opt for opt in self.conf.argdict['athenaopts'].value if '--nprocs' in opt]) > 0: - msg.info('Detected --nprocs argument for athena - setting athenaMP=True flag') - athenaMPEnabled = True - else: - athenaMPEnabled = False - - # If AthenaMP has not been enabled, we don't care about the version - if not athenaMPEnabled: - msg.info('No AthenaMP options found - assuming normal athena run') - return False, False - - # Now need to see if we're running with AthenaMP v1 or v2. In v1 AthenaMP - # handles all special merging and setup, so we ignore it. In v2 the - # transform takes an active role in smart merging and job setup. - # We signal AthenaMPv1 by returning False, True; v2 by True, False - from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps - if 'Version' in dir(AthenaMPJobProps.AthenaMPFlags): - if AthenaMPJobProps.AthenaMPFlags.Version == 1: - msg.info("AthenaMP properties indicates version 1 - no special AthenaMP processing will be done") - return False, True - elif releaseIsOlderThan(17, 7): - msg.info("Release is older than 17.7, so assuming AthenaMP version 1 - no special AthenaMP processing will be done") - return False, True - return True, False - - except ValueError: - msg.error('Could not understand ATHENA_PROC_NUMBER environment variable (int conversion failed)') - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Invalid ATHENA_PROC_NUMBER environment variable') - - ## @brief Prepare the correct command line to be used to invoke athena def _prepAthenaCommandLine(self): ## Start building up the command line @@ -1052,9 +1005,7 @@ class athenaExecutor(scriptExecutor): ## Add --drop-and-reload if possible (and allowed!) if self._tryDropAndReload: - if self._athenaMPv1: - msg.info('Disabling "--drop-and-reload" because the job is configured to use AthenaMP v1') - elif 'valgrind' in self.conf._argdict and self.conf._argdict['valgrind'].value is True: + if 'valgrind' in self.conf._argdict and self.conf._argdict['valgrind'].value is True: msg.info('Disabling "--drop-and-reload" because the job is configured to use Valgrind') elif 'athenaopts' in self.conf.argdict: athenaConfigRelatedOpts = ['--config-only','--drop-and-reload','--drop-configuration','--keep-configuration'] @@ -1122,7 +1073,7 @@ class athenaExecutor(scriptExecutor): print >>wrapper, 'DATAPATH={dbroot}:$DATAPATH'.format(dbroot = dbroot) if self.conf._disableMP: print >>wrapper, "# AthenaMP explicitly disabled for this executor" - print >>wrapper, "unset ATHENA_PROC_NUMBER" + print >>wrapper, "export ATHENA_PROC_NUMBER=0" if self._envUpdate.len > 0: print >>wrapper, "# Customised environment" for envSetting in self._envUpdate.values: @@ -1184,7 +1135,6 @@ class athenaExecutor(scriptExecutor): def _smartMerge(self, fileArg): ## @note Produce a list of merge jobs - this is a list of lists # @todo This should be configurable! - # @note Value is set very low for now for testing ## @note only file arguments which support selfMerge() can be merged if 'selfMerge' not in dir(fileArg): @@ -1194,8 +1144,7 @@ class athenaExecutor(scriptExecutor): if fileArg.mergeTargetSize == 0: msg.info('Files in {0} will not be merged as target size is set to 0)'.format(fileArg.name)) return - - + mergeCandidates = [list()] currentMergeSize = 0 for fname in fileArg.value: @@ -1238,6 +1187,29 @@ class athenaExecutor(scriptExecutor): ## We want to parallelise this part! fileArg.selfMerge(output=mergeName, inputs=mergeGroup, argdict=self.conf.argdict) + + def _targzipJiveXML(self): + #tgzipping JiveXML files + targetTGZName = self.conf.dataDictionary['TXT_JIVEXMLTGZ'].value[0] + if os.path.exists(targetTGZName): + os.remove(targetTGZName) + + import tarfile + fNameRE = re.compile("JiveXML\_\d+\_\d+.xml") + + # force gz compression + tar = tarfile.open(targetTGZName, "w:gz") + for fName in os.listdir('.'): + matches = fNameRE.findall(fName) + if len(matches) > 0: + if fNameRE.findall(fName)[0] == fName: + msg.info('adding %s to %s' % (fName, targetTGZName)) + tar.add(fName) + + tar.close() + msg.info('JiveXML compression: %s has been written and closed.' % (targetTGZName)) + + ## @brief Athena executor where failure is not consisered fatal class optionalAthenaExecutor(athenaExecutor): @@ -1494,31 +1466,39 @@ class NTUPMergeExecutor(scriptExecutor): super(NTUPMergeExecutor, self).preExecute(input=input, output=output) -## @brief Specalise the athena executor to deal with the BS merge oddity of excluding empty DRAWs + +## @brief Specalise the script executor to deal with the BS merge oddity of excluding empty DRAWs class bsMergeExecutor(scriptExecutor): def preExecute(self, input = set(), output = set()): + self._inputBS = list(input)[0] + self._outputBS = list(output)[0] self._maskedFiles = [] - if 'BS' in self.conf.argdict and 'maskEmptyInputs' in self.conf.argdict and self.conf.argdict['maskEmptyInputs'].value is True: + self._useStubFile = False + if 'maskEmptyInputs' in self.conf.argdict and self.conf.argdict['maskEmptyInputs'].value is True: eventfullFiles = [] - for fname in self.conf.dataDictionary['BS'].value: - nEvents = self.conf.dataDictionary['BS'].getSingleMetadata(fname, 'nentries') + for fname in self.conf.dataDictionary[self._inputBS].value: + nEvents = self.conf.dataDictionary[self._inputBS].getSingleMetadata(fname, 'nentries') msg.debug('Found {0} events in file {1}'.format(nEvents, fname)) if isinstance(nEvents, int) and nEvents > 0: eventfullFiles.append(fname) - self._maskedFiles = list(set(self.conf.dataDictionary['BS'].value) - set(eventfullFiles)) + self._maskedFiles = list(set(self.conf.dataDictionary[self._inputBS].value) - set(eventfullFiles)) if len(self._maskedFiles) > 0: msg.info('The following input files are masked because they have 0 events: {0}'.format(' '.join(self._maskedFiles))) - if len(self.conf.dataDictionary['BS'].value) == 0: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), - 'All input files had zero events - aborting BS merge') + if len(eventfullFiles) == 0: + if 'emptyStubFile' in self.conf.argdict and path.exists(self.conf.argdict['emptyStubFile'].value): + self._useStubFile = True + msg.info("All input files are empty - will use stub file {0} as output".format(self.conf.argdict['emptyStubFile'].value)) + else: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'All input files had zero events - aborting BS merge') # Write the list of input files to a text file, so that testMergedFiles can swallow it - self._mergeBSFileList = '{0}.list'.format(self._exe) - self._mergeBSLogfile = '{0}.out'.format(self._exe) + self._mergeBSFileList = '{0}.list'.format(self._name) + self._mergeBSLogfile = '{0}.out'.format(self._name) try: with open(self._mergeBSFileList, 'w') as BSFileList: - for fname in self.conf.dataDictionary['BS'].value: + for fname in self.conf.dataDictionary[self._inputBS].value: if fname not in self._maskedFiles: print >>BSFileList, fname except (IOError, OSError) as e: @@ -1527,7 +1507,7 @@ class bsMergeExecutor(scriptExecutor): raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'), errMsg) # Hope that we were given a correct filename... - self._outputFilename = self.conf.dataDictionary['BS_MRG'].value[0] + self._outputFilename = self.conf.dataDictionary[self._outputBS].value[0] if self._outputFilename.endswith('._0001.data'): self._doRename = False self._outputFilename = self._outputFilename.split('._0001.data')[0] @@ -1544,17 +1524,32 @@ class bsMergeExecutor(scriptExecutor): self._cmd = [self._exe, self._mergeBSFileList, '0', self._outputFilename] super(bsMergeExecutor, self).preExecute(input=input, output=output) - + + def execute(self): + if self._useStubFile: + # Need to fake execution! + self._exeStart = os.times() + msg.info("Using stub file for empty BS output - execution is fake") + if self._outputFilename != self.conf.argdict['emptyStubFile'].value: + os.rename(self.conf.argdict['emptyStubFile'].value, self._outputFilename) + self._memMonitor = False + self._hasExecuted = True + self._rc = 0 + self._exeStop = os.times() + else: + super(bsMergeExecutor, self).execute() def postExecute(self): - if self._doRename: + if self._useStubFile: + pass + elif self._doRename: self._expectedOutput = self._outputFilename + '._0001.data' - msg.info('Renaming {0} to {1}'.format(self._expectedOutput, self.conf.dataDictionary['BS_MRG'].value[0])) + msg.info('Renaming {0} to {1}'.format(self._expectedOutput, self.conf.dataDictionary[self._outputBS].value[0])) try: - os.rename(self._outputFilename + '._0001.data', self.conf.dataDictionary['BS_MRG'].value[0]) + os.rename(self._outputFilename + '._0001.data', self.conf.dataDictionary[self._outputBS].value[0]) except OSError, e: raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), - 'Exception raised when renaming {0} to {1}: {2}'.format(self._outputFilename, self.conf.dataDictionary['BS_MRG'].value[0], e)) + 'Exception raised when renaming {0} to {1}: {2}'.format(self._outputFilename, self.conf.dataDictionary[self._outputBS].value[0], e)) super(bsMergeExecutor, self).postExecute() diff --git a/Tools/PyJobTransforms/python/trfFileUtils.py b/Tools/PyJobTransforms/python/trfFileUtils.py index 5f5319d136e5716f9ead64aa8ff9658a15b80bc9..c2eb5013cfa4f5c25b178b4d152aea0a5f836b0b 100644 --- a/Tools/PyJobTransforms/python/trfFileUtils.py +++ b/Tools/PyJobTransforms/python/trfFileUtils.py @@ -4,7 +4,7 @@ # @brief Transform utilities to deal with files. # @details Mainly used by argFile class. # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfFileUtils.py 665892 2015-05-08 14:54:36Z graemes $ +# @version $Id: trfFileUtils.py 675949 2015-06-17 12:12:29Z graemes $ # @todo make functions timelimited import logging @@ -118,7 +118,11 @@ def AthenaLiteFileInfo(filename, filetype, retrieveKeys = athFileInterestingKeys from subprocess import CalledProcessError if filetype == 'POOL': - from PyUtils.AthFileLite import AthPoolFile as AthFileLite + # retrieve GUID and nentries without runMiniAthena subprocess + if set(retrieveKeys) == set(inpFileInterestingKeys): + from PyUtils.AthFileLite import AthInpFile as AthFileLite + else: + from PyUtils.AthFileLite import AthPoolFile as AthFileLite elif filetype == 'BS': from PyUtils.AthFileLite import AthBSFile as AthFileLite elif filetype == 'TAG': @@ -178,7 +182,7 @@ def AthenaLiteFileInfo(filename, filetype, retrieveKeys = athFileInterestingKeys metaDict[filename][key] = meta[key] except KeyError: msg.warning('Missing key in athFile info: {0}'.format(key)) - except (CalledProcessError, ValueError, AssertionError) as e: + except (CalledProcessError, ValueError, AssertionError, ReferenceError) as e: msg.error('Problem in getting AthFile metadata for {0}'.format(filename)) return None msg.debug('Returning {0}'.format(metaDict)) diff --git a/Tools/PyJobTransforms/python/trfMPTools.py b/Tools/PyJobTransforms/python/trfMPTools.py new file mode 100644 index 0000000000000000000000000000000000000000..ca97386eddf94da470a85a06b9a0bdfd700011fd --- /dev/null +++ b/Tools/PyJobTransforms/python/trfMPTools.py @@ -0,0 +1,155 @@ +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @package PyJobTransforms.trfMPTools +# +# @brief Utilities for handling AthenaMP jobs +# @author atlas-comp-transforms-dev@cern.ch +# @version $Id: trfMPTools.py 677748 2015-06-23 20:29:35Z graemes $ +# + +__version__ = '$Revision' + +import os +import os.path as path +import re + +import logging +msg = logging.getLogger(__name__) + +from xml.etree import ElementTree + +from PyJobTransforms.trfExitCodes import trfExit + +import PyJobTransforms.trfExceptions as trfExceptions + +## @brief Detect if AthenaMP has been requested +# @param argdict Argument dictionary, used to access athenaopts for the job +# @return Integer with the number of processes, N.B. 0 means non-MP serial mode +def detectAthenaMPProcs(argdict = {}): + athenaMPProcs = 0 + + # Try and detect if any AthenaMP has been enabled + try: + if 'ATHENA_PROC_NUMBER' in os.environ: + athenaMPProcs = int(os.environ['ATHENA_PROC_NUMBER']) + if athenaMPProcs < 0: + raise ValueError("ATHENA_PROC_NUMBER value was less than zero") + msg.info('AthenaMP detected from ATHENA_PROC_NUMBER with {0} workers'.format(athenaMPProcs)) + elif 'athenaopts' in argdict: + procArg = [opt.replace("--nprocs=", "") for opt in argdict['athenaopts'].value if '--nprocs' in opt] + if len(procArg) == 0: + athenaMPProcs = 0 + elif len(procArg) == 1: + athenaMPProcs = int(procArg[0]) + if athenaMPProcs < 0: + raise ValueError("--nprocs was set to a value less than zero") + else: + raise ValueError("--nprocs was set more than once in 'athenaopts'") + msg.info('AthenaMP detected from "nprocs" setting with {0} workers'.format(athenaMPProcs)) + except ValueError, errMsg: + myError = 'Problem discovering AthenaMP setup: {0}'.format(errMsg) + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError) + + return athenaMPProcs + +## @brief Handle AthenaMP outputs, updating argFile instances to real +# @param athenaMPFileReport XML file with outputs that AthenaMP knew about +# @param athenaMPWorkerTopDir Subdirectory with AthenaMP worker run directories +# @param dataDictionary This substep's data dictionary, allowing all files to be +# updated to the appropriate AthenaMP worker files +# @param athenaMPworkers Number of AthenaMP workers +# @return @c None; side effect is the update of the @c dataDictionary +def athenaMPOutputHandler(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary, athenaMPworkers): + msg.debug("MP output handler called for report {0} and workers in {1}, data types {2}".format(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary.keys())) + outputHasBeenHandled = dict([ (dataType, False) for dataType in dataDictionary.keys() if dataDictionary[dataType] ]) + + # First, see what AthenaMP told us + mpOutputs = ElementTree.ElementTree() + try: + mpOutputs.parse(athenaMPFileReport) + except IOError: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Missing AthenaMP outputs file {0} (probably athena crashed)".format(athenaMPFileReport)) + for filesElement in mpOutputs.getroot().getiterator(tag='Files'): + msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib)) + originalArg = None + originalName = filesElement.attrib['OriginalName'] + for dataType, fileArg in dataDictionary.iteritems(): + if fileArg.value[0] == originalName: + originalArg = fileArg + outputHasBeenHandled[dataType] = True + break + if originalArg is None: + msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(originalName)) + continue + + msg.debug('Found matching argument {0}'.format(originalArg)) + fileNameList = [] + for fileElement in filesElement.getiterator(tag='File'): + msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib)) + fileNameList.append(path.relpath(fileElement.attrib['name'])) + + athenaMPoutputsLinkAndUpdate(fileNameList, fileArg) + + # Now look for additional outputs that have not yet been handled + if len([ dataType for dataType in outputHasBeenHandled if outputHasBeenHandled[dataType] is False]): + # OK, we have something we need to search for; cache the dirwalk here + MPdirWalk = [ dirEntry for dirEntry in os.walk(athenaMPWorkerTopDir) ] + + for dataType, fileArg in dataDictionary.iteritems(): + if outputHasBeenHandled[dataType]: + continue + if fileArg.io is "input": + continue + msg.info("Searching MP worker directories for {0}".format(dataType)) + originalName = fileArg.value[0] + fileNameList = [] + for entry in MPdirWalk: + if "evt_count" in entry[0]: + continue + # N.B. AthenaMP may have made the output name unique for us, so + # we need to treat the original name as a prefix + possibleOutputs = [ fname for fname in entry[2] if fname.startswith(originalName) ] + if len(possibleOutputs) == 0: + continue + elif len(possibleOutputs) == 1: + fileNameList.append(path.join(entry[0], possibleOutputs[0])) + else: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found multiple matching outputs for datatype {0} in {1}: {2}".format(dataType, entry[0], possibleOutputs)) + if len(fileNameList) != athenaMPworkers: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found {0} output files for {1}, expected {2} (found: {3})".format(len(fileNameList), dataType, athenaMPworkers, fileNameList)) + + # Found expected number of files - good! + athenaMPoutputsLinkAndUpdate(fileNameList, fileArg) + + +def athenaMPoutputsLinkAndUpdate(newFullFilenames, fileArg): + originalName = fileArg.value[0] + # Do we need to append worker dir suffixes? + linkedNameList = [] + uniqueSimpleNames = set([path.basename(fname) for fname in newFullFilenames]) + if len(uniqueSimpleNames) != len(newFullFilenames): + for fname in newFullFilenames: + simpleName = path.basename(fname) + workerIndexMatch = re.search(r'/worker_(\d+)/', fname) + if workerIndexMatch: + workerIndex = workerIndexMatch.group(1) + else: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found output file ({0}) not in an AthenaMP worker directory".format(fname)) + simpleName += "._{0:03d}".format(int(workerIndex)) + linkedNameList.append(simpleName) + else: + linkedNameList = [path.basename(fname) for fname in newFullFilenames] + + for linkname, fname in zip(linkedNameList, newFullFilenames): + try: + if path.lexists(linkname): + os.unlink(linkname) + os.symlink(fname, linkname) + except OSError, e: + raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to link {0} to {1}: {2}".format(fname, linkname, e)) + + fileArg.multipleOK = True + fileArg.value = linkedNameList + fileArg.originalName = originalName + msg.debug('MP output argument updated to {0}'.format(fileArg)) + diff --git a/Tools/PyJobTransforms/python/trfReports.py b/Tools/PyJobTransforms/python/trfReports.py index 775b700a1886a67a560834dfc58cc5021876a402..ab7c88d9d76ea58f52ef9840c770d01a71a40f91 100644 --- a/Tools/PyJobTransforms/python/trfReports.py +++ b/Tools/PyJobTransforms/python/trfReports.py @@ -6,10 +6,10 @@ # @details Classes whose instance encapsulates transform reports # at different levels, such as file, executor, transform # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfReports.py 665892 2015-05-08 14:54:36Z graemes $ +# @version $Id: trfReports.py 681299 2015-07-08 11:28:37Z lerrenst $ # -__version__ = '$Revision: 665892 $' +__version__ = '$Revision: 681299 $' import cPickle as pickle import json @@ -105,7 +105,7 @@ class trfReport(object): class trfJobReport(trfReport): ## @brief This is the version counter for transform job reports # any changes to the format @b must be reflected by incrementing this - _reportVersion = '1.0.1' + _reportVersion = '1.0.5' _metadataKeyMap = {'AMIConfig': 'AMI', } _maxMsgLen = 256 _truncationMsg = " (truncated)" @@ -171,6 +171,13 @@ class trfJobReport(trfReport): 'wallTime': exe.wallTime,} if exe.memStats: exeResource['memory'] = exe.memStats + if exe.eventCount: + exeResource['nevents'] = exe.eventCount + if exe.athenaMP: + exeResource['mpworkers'] = exe.athenaMP + if exe.dbMonitor: + exeResource['dbData'] = exe.dbMonitor['bytes'] + exeResource['dbTime'] = exe.dbMonitor['time'] myDict['resource']['executor'][executionStep['name']] = exeResource # Resource consumption @@ -400,6 +407,7 @@ class trfFileReport(object): # move metadata to subFile dict, before it can be compressed metaData = self._fileArg._fileMetadata for fileName in metaData.keys(): + msg.info("Examining metadata for file {0}".format(fileName)) if basenameReport == False: searchFileName = fileName else: @@ -562,6 +570,11 @@ class machineReport(object): pass except Exception, e: msg.warning('Unexpected error while parsing /proc/cpuinfo: {0}'.format(e)) + try: + with open('/etc/machinefeatures/hs06') as hs: + machine['hepspec'] = hs.readlines()[0].strip() + except IOError, e: + msg.info('Could not find HEPSPEC: {0}'.format(e)) return machine diff --git a/Tools/PyJobTransforms/python/trfValidation.py b/Tools/PyJobTransforms/python/trfValidation.py index 17d0b87e3ff4e6958f5d67b9a9b2e594a999f64d..7f244845babe3e1b480fca432601e1697a82ef0c 100644 --- a/Tools/PyJobTransforms/python/trfValidation.py +++ b/Tools/PyJobTransforms/python/trfValidation.py @@ -6,7 +6,7 @@ # @details Contains validation classes controlling how the transforms # will validate jobs they run. # @author atlas-comp-transforms-dev@cern.ch -# @version $Id: trfValidation.py 665892 2015-05-08 14:54:36Z graemes $ +# @version $Id: trfValidation.py 679715 2015-07-02 11:28:03Z lerrenst $ # @note Old validation dictionary shows usefully different options: # <tt>self.validationOptions = {'testIfEmpty' : True, 'testIfNoEvents' : False, 'testIfExists' : True, # 'testIfCorrupt' : True, 'testCountEvents' : True, 'extraValidation' : False, @@ -258,6 +258,8 @@ class athenaLogFileReport(logFileReport): self._errorDetails[level] = [] # Format: # List of dicts {'message': errMsg, 'firstLine': lineNo, 'count': N} + self._dbbytes = None + self._dbtime = None def scanLogFile(self, resetReport=False): @@ -291,11 +293,11 @@ class athenaLogFileReport(logFileReport): continue # Add the G4 exceptipon parsers if 'G4Exception-START' in line > -1: - msg.warning('Detected G4 9.4 exception report - activating G4 exception grabber') - self.g4ExceptionParser(myGen, line, lineCounter) + msg.warning('Detected G4 exception report - activating G4 exception grabber') + self.g4ExceptionParser(myGen, line, lineCounter, 40) continue if '*** G4Exception' in line > -1: - msg.warning('Detected G4 exception report - activating G4 exception grabber') + msg.warning('Detected G4 9.4 exception report - activating G4 exception grabber') self.g494ExceptionParser(myGen, line, lineCounter) continue # Add the python exception parser @@ -365,6 +367,15 @@ class athenaLogFileReport(logFileReport): else: # Overcounted pass + if self._dbbytes is None and 'Total payload read from COOL' in fields['message']: + msg.debug("Found COOL payload information at line {0}".format(line)) + a = re.match(r'(\D+)(?P<bytes>\d+)(\D+)(?P<time>\d+[.]?\d*)(\D+)', fields['message']) + self._dbbytes = int(a.group('bytes')) + self._dbtime = float(a.group('time')) + + ## Return data volume and time spent to retrieve information from the database + def dbMonitor(self): + return {'bytes' : self._dbbytes, 'time' : self._dbtime} ## Return the worst error found in the logfile (first error of the most serious type) def worstError(self): @@ -449,7 +460,7 @@ class athenaLogFileReport(logFileReport): self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1}) - def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount): + def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount, g4ExceptionLineDepth): g4Report = firstline g4lines = 1 for line, linecounter in lineGenerator: @@ -458,7 +469,7 @@ class athenaLogFileReport(logFileReport): # Test for the closing string if 'G4Exception-END' in line: break - if g4lines >= 25: + if g4lines >= g4ExceptionLineDepth: msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount)) break @@ -682,6 +693,7 @@ class eventMatch(object): # All data is taken from _trf dict def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False): self._executor = executor + self._eventCount = None ## @note This double dictionary is formed of INPUT data, then a dictionary of the expected # event counts from different output data types. If there is no exact match for the output @@ -702,7 +714,7 @@ class eventMatch(object): self._eventCountConf['EVNT_Stopped'] = {'HITS': simEventEff} self._eventCountConf['HITS'] = {'RDO':"match", "HITS_MRG":"match", 'HITS_FILT': simEventEff, "RDO_FILT": "filter"} self._eventCountConf['BS'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "BS_MRG":"match", 'DESD_*': "filter"} - self._eventCountConf['RDO*'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "RDO_MRG":"match"} + self._eventCountConf['RDO*'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "RDO_MRG":"match", "RDO_TRIG":"match"} self._eventCountConf['ESD'] = {'ESD_MRG': "match", 'AOD':"match", 'DESD_*':"filter", 'DAOD_*':"filter", 'NTUP_*':"filter"} self._eventCountConf['AOD'] = {'AOD_MRG' : "match", 'TAG':"match", "NTUP_*":"filter", "DAOD_*":"filter", 'NTUP_*':"filter"} self._eventCountConf['AOD_MRG'] = {'TAG':"match"} @@ -728,6 +740,10 @@ class eventMatch(object): if self._executor is not None: self.configureCheck(override=False) + @property + def eventCount(self): + return self._eventCount + ## @brief Setup the parameters needed to define particular checks # @param override If set then configure the checks using this dictionary, which needs # to have keys @c inEventDict, @c outEventDict, @c skipEvents, @c maxEvents, @c evAccEff @@ -881,5 +897,5 @@ class eventMatch(object): else: raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'), 'Unrecognised event count configuration for {inData} to {outData}: "{conf}" is not known'.format(inData=inData, outData=outData, conf=checkConf)) - + self._eventCount = expectedEvents return True diff --git a/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py b/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py index c9ba568e3b346085cc800d3974d8ef93529dbf3d..9415d48b7a272fbcbc84c3802695bb4fd8c1b1de 100755 --- a/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py +++ b/Tools/PyJobTransforms/scripts/ValidateFiles_tf.py @@ -30,7 +30,6 @@ def main(): trf=getTransform() trf.parseCmdLineArgs(sys.argv[1:]) - print '+++', trf._argdict trf.execute() trf.generateReport() sys.exit(trf.exitCode) diff --git a/Tools/PyJobTransforms/scripts/makeTrfJSONSignatures.py b/Tools/PyJobTransforms/scripts/makeTrfJSONSignatures.py index d76efbf31925207f243b4ee74f1caa50bbd38484..be696cc278260e28e69097e41191838eb65bb9f8 100755 --- a/Tools/PyJobTransforms/scripts/makeTrfJSONSignatures.py +++ b/Tools/PyJobTransforms/scripts/makeTrfJSONSignatures.py @@ -2,7 +2,7 @@ # Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration # -# $Id: makeTrfJSONSignatures.py 659213 2015-04-07 13:20:39Z graemes $ +# $Id: makeTrfJSONSignatures.py 682012 2015-07-10 07:44:44Z graemes $ # ## @brief Dump job transform arguments into a file, JSON encoded @@ -25,7 +25,7 @@ def _getTransformsFromPATH(): transforms = [ entry for entry in os.listdir(path_element) if entry.endswith("_tf.py") ] for trf in transforms: if trf not in transforms_list: - if trf in ('Athena_tf.py', 'Cat_tf.py', 'Echo_tf.py', 'ExeWrap_tf.py', 'Sleep_tf.py', 'RAWtoESD_tf.py', 'ESDtoAOD_tf.py'): + if trf in ('Athena_tf.py', 'Cat_tf.py', 'Echo_tf.py', 'ExeWrap_tf.py', 'Sleep_tf.py', 'RAWtoESD_tf.py', 'ESDtoAOD_tf.py', 'beamSpotT0_Vertex_tf.py'): # Test transforms - not for production continue transforms_list.append(trf) diff --git a/Tools/PyJobTransforms/share/atlas_error_mask.db b/Tools/PyJobTransforms/share/atlas_error_mask.db index a5232afdf01183e6487d7ba8c67e4eea10a16766..2dee9086974b99d41419d1b80c0720af89fa71f4 100755 --- a/Tools/PyJobTransforms/share/atlas_error_mask.db +++ b/Tools/PyJobTransforms/share/atlas_error_mask.db @@ -1,4 +1,4 @@ -# $Id: atlas_error_mask.db 576626 2013-12-21 23:29:31Z graemes $ +# $Id: atlas_error_mask.db 670822 2015-05-29 09:20:49Z graemes $ # Error mask file for new transforms # Each line contains 3 fields, separated by commas: @@ -63,7 +63,7 @@ THistSvc.sysFinali, FATAL, Standard std::exception is caught ,ERROR, \(poolDb\): ,ERROR, \(pool\): ,ERROR, - G4Navigator::ComputeStep\(\) -,,.*ERROR OCCURED DURING A SECONDARY SCATTER AND WAS +,,.*OCCURED DURING A SECONDARY SCATTER AND WAS THistSvc , ERROR, already registered an object with identifier .* ,ERROR, MuonDetectorManager::getCscReadoutElement stNameindex out of range .* muFast_\S+ , ERROR, CSM for Subsystem \d+, MrodId \d+, LinkId \d+ not found diff --git a/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py b/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py index cdeb52174b6544d64d6542c79eee90aa1a6b41ef..775f68bbee8a2443e4372277cf6609612f93e3ad 100755 --- a/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py +++ b/Tools/PyJobTransforms/test/test_Reco_AthenaMP_tf.py @@ -18,23 +18,14 @@ from PyJobTransforms.trfLogger import msg from PyJobTransforms.trfReports import pyJobReportToFileDict from PyJobTransforms.trfUtils import releaseIsOlderThan -sourceFiles = '/afs/cern.ch/work/g/graemes/ddm/pmb/data12_8TeV.00209109.physics_JetTauEtmiss.merge.RAW._lb0186._SFO-1._0001.1' - class Reco_tfAthenaMPtest(unittest.TestCase): def test_runReco_tf(self): - inputs = glob.glob(sourceFiles) - self.assertEqual(len(inputs), 1) - cmd = ['Reco_tf.py', '--inputBSFile'] - cmd.extend(inputs) - cmd.extend(['--autoConfiguration', 'everything']) - cmd.extend(['--outputESDFile', 'my.ESD.pool.root']) - cmd.extend(['--outputAODFile', 'my.AOD.pool.root']) - cmd.extend(['--outputHISTFile', 'my.HIST.root']) - cmd.extend(['--preExec', 'rec.doTrigger=False']) # This is temporary while trigger doesn't work in r19 - cmd.extend(['--outputTAGFile', 'my.TAG.pool.root']) - cmd.extend(['--maxEvents', '24',]) + cmd = ['Reco_tf.py'] + cmd.extend(['--AMI', 'q222']) + cmd.extend(['--maxEvents', '24']) cmd.append('--athenaopts=--nprocs=4') + cmd.extend(['--athenaMPMergeTargetSize', 'ESD:0']) msg.info('Will run this transform: {0}'.format(cmd)) p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) @@ -54,14 +45,8 @@ class Reco_tfAthenaMPtest(unittest.TestCase): self.assertTrue('ESD' in dataDict.keys()) self.assertTrue('AOD' in dataDict.keys()) self.assertTrue('HIST' in dataDict.keys()) - self.assertEqual(dataDict['ESD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') - self.assertEqual(dataDict['ESD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') - self.assertEqual(dataDict['ESD']['subFiles'][0]['beam_type'], ['collisions']) - self.assertEqual(dataDict['ESD']['subFiles'][0]['name'], 'my.ESD.pool.root') - self.assertEqual(dataDict['AOD']['subFiles'][0]['geometry'], 'ATLAS-GEO-20-00-01') - self.assertEqual(dataDict['AOD']['subFiles'][0]['conditions_tag'], 'COMCOND-BLKPA-006-01') - self.assertEqual(dataDict['AOD']['subFiles'][0]['beam_type'], ['collisions']) - self.assertEqual(dataDict['AOD']['subFiles'][0]['name'], 'my.AOD.pool.root') + self.assertTrue(len(dataDict['ESD']['subFiles']), 4) + self.assertEqual(dataDict['AOD']['subFiles'][0]['nentries'], 24) self.assertEqual(dataDict['HIST']['subFiles'][0]['nentries'], 24) if __name__ == '__main__': diff --git a/Tools/PyJobTransforms/test/test_Reco_q222_tf.py b/Tools/PyJobTransforms/test/test_Reco_q222_tf.py new file mode 100755 index 0000000000000000000000000000000000000000..5a8314b7865758d8a245c4fe15454868646c00c1 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_Reco_q222_tf.py @@ -0,0 +1,45 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +# Run a Reco job and test key metadata in the output +# + +import glob +import json +import subprocess +import os +import os.path +import sys +import unittest + +from PyJobTransforms.trfLogger import msg +from PyJobTransforms.trfReports import pyJobReportToFileDict + +class Reco_tftest(unittest.TestCase): + + def test_runReco_q222_tf(self): + cmd = ['Reco_tf.py'] + cmd.extend(['--AMI', 'q222']) + cmd.extend(['--maxEvents', '2']) + msg.info('Will run this transform: {0}'.format(cmd)) + p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) + while p.poll() is None: + line = p.stdout.readline() + sys.stdout.write(line) + # Hoover up remaining buffered output lines + for line in p.stdout: + sys.stdout.write(line) + self.assertEqual(p.returncode, 0) + + # Now load metadata and test a few important values + with open('jobReport.json') as jr: + md = json.load(jr) + self.assertEqual(isinstance(md, dict), True) + self.assertTrue('resource' in md.keys()) + self.assertEqual(md['resource']['executor']['AODtoTAG']['eventCount'], 2) + self.assertEqual(md['resource']['executor']['ESDtoAOD']['eventCount'], 2) + self.assertEqual(md['resource']['executor']['RAWtoESD']['eventCount'], 2) + +if __name__ == '__main__': + unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfArgClasses.py b/Tools/PyJobTransforms/test/test_trfArgClasses.py index 930f6177cf6fe5f43837cb879e207212d9918480..a6f6c32edda2cf4bfaa5aad4a5f154be3d5ae5eb 100755 --- a/Tools/PyJobTransforms/test/test_trfArgClasses.py +++ b/Tools/PyJobTransforms/test/test_trfArgClasses.py @@ -5,7 +5,7 @@ ## @Package test_trfArgClasses.py # @brief Unittests for test_trfArgClasses.py # @author graeme.andrew.stewart@cern.ch -# @version $Id: test_trfArgClasses.py 648031 2015-02-19 09:57:41Z graemes $ +# @version $Id: test_trfArgClasses.py 678200 2015-06-25 10:29:34Z graemes $ # @note Tests of ATLAS specific file formats moved to test_trfArgClassesATLAS.py import unittest @@ -16,6 +16,7 @@ msg = logging.getLogger(__name__) # Allowable to import * from the package for which we are the test suite from PyJobTransforms.trfArgClasses import * from PyJobTransforms.trfUtils import cmpMetadata +from PyJobTransforms.trfAMI import getAMIClient ## Unittests for this module class trfArgumentTests(unittest.TestCase): @@ -373,6 +374,51 @@ class argSteeringTests(unittest.TestCase): self.assertEqual(steer.value, {'RAWtoESD': [('in', '-', 'RDO'), ('in', '+', 'RDO_TRIG'), ('in', '-', 'BS')]}) +class argConditionsTests(unittest.TestCase): + def setup(self): + # store getAMIClient and fake a new one + # fake client with execute function, returning fake dom_object + class client(object): + def execute(self, cmd, format): + return dom() + # fake dom_object with get_rows function + class dom(object): + def get_rows(self): + return [{'globalTag': 'TEST'}] + def getFakeClient(): + return client + amiClient = argSubstepConditions.value.fset.func_globals['getAMIClient'] + argSubstepConditions.value.fset.func_globals['getAMIClient'] = getFakeClient() + return amiClient + + def test_condStr(self): + client = self.setup() + cond1 = argSubstepConditions('CurrentMC') + cond2 = argSubstepConditions('step:CurrentMC') + cond3 = argSubstepConditions('step:OFLCOND-RUN12-SDR-28') + self.assertEqual(cond1.value, {'all': 'TEST'}) + self.assertEqual(cond2.value, {'step': 'TEST'}) + self.assertEqual(cond3.value, {'step': 'OFLCOND-RUN12-SDR-28'}) + self.tear_down(client) + def test_condList(self): + client = self.setup() + cond = argSubstepConditions(['CurrentMC', 'one:something']) + self.assertEqual(cond.value, {'all': 'TEST', 'one': 'something'}) + self.tear_down(client) + def test_condDict(self): + client = self.setup() + d1 = {'all': 'other', 'one': 'CurrentMC'} + d2 = {'one': 'apples', 'two': 'bananas'} + cond1 = argSubstepConditions(d1) + cond2 = argSubstepConditions(d2) + self.assertEqual(cond1.value, {'all': 'other', 'one': 'TEST'}) + self.assertEqual(cond2.value, {'one': 'apples', 'two': 'bananas'}) + self.tear_down(client) + def tear_down(self, client): + argSubstepConditions.value.fset.func_globals['getAMIClient'] = client + + + class argFileTests(unittest.TestCase): def setUp(self): # In python 2.7 support for multiple 'with' expressions becomes available @@ -382,6 +428,8 @@ class argFileTests(unittest.TestCase): print >>f2, 'Short file 2' with open('file3', 'w') as f3: print >>f3, 80*'-', 'Long file 3', 80*'-' + with open('file4', 'w') as f4: + print >>f4, 'Short file number 4' with open('prefix.prodsysfile._001.suffix.1', 'w') as f1: print >>f1, 'This is prodsys test file 1' with open('prefix.prodsysfile._002.suffix.4', 'w') as f2: @@ -395,7 +443,7 @@ class argFileTests(unittest.TestCase): self.myManualGUIDMultiFile = argFile(['file1', 'file2', 'file3'], io='input', guid={'file1': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'file2': '1368D295-27C6-4A92-8187-704C2A6A5864', 'file3': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9'}) def tearDown(self): - for f in ('file1', 'file2', 'file3', 'prefix.prodsysfile._001.suffix.1', 'prefix.prodsysfile._002.suffix.4', + for f in ('file1', 'file2', 'file3', 'file4', 'prefix.prodsysfile._001.suffix.1', 'prefix.prodsysfile._002.suffix.4', 'prefix.prodsysfile._003.suffix.7'): try: os.unlink(f) @@ -416,8 +464,7 @@ class argFileTests(unittest.TestCase): def test_argFileGlob(self): myInput = argFile('file?', io='input') - # Use set comparison as glob order is not guaranteed - self.assertEqual(set(myInput.value), set(['file1', 'file2', 'file3'])) + self.assertEqual(myInput.value, ['file1', 'file2', 'file3', 'file4']) def test_argFileProdsysGlob(self): myInput = argFile('prefix.prodsysfile._[001,002,003].suffix', io='input') @@ -428,10 +475,6 @@ class argFileTests(unittest.TestCase): self.assertEqual(self.mySingleFile.io, 'output') self.assertEqual(self.myMultiFile.io, 'input') - def test_argFileDataset(self): - withDataset = argFile('fakeDatasetName#file1') - self.assertEqual(withDataset.dataset, 'fakeDatasetName') - def test_argFileMetadata(self): # Can't test all metadata directly now we added a GUID generator self.assertTrue(cmpMetadata(self.mySingleFile.getMetadata(), {'file1': {'_exists': True, 'file_guid': 'D6F5F632-4EA6-4EA6-9A78-9CF59C247094', 'integrity': True, 'file_size': 20}})) @@ -458,6 +501,60 @@ class argFileTests(unittest.TestCase): def test_argFileSetMetadata(self): self.myMultiFile._setMetadata(files=None, metadataKeys={'file_size': 1234567, '_exists': True}) self.assertEqual(self.myMultiFile.getSingleMetadata('file1', 'file_size'), 1234567) + + + ## @brief Tests of Tier-0 dictionary setup + def test_argFileTier0Dict(self): + t0file = argFile([{'dsn': 'testDataset', 'lfn': 'file1', 'guid': 'D6F5F632-4EA6-4EA6-9A78-9CF59C247094', 'events': 12, 'checksum': 'ad:fcd568ea'}, + {'dsn': 'testDataset', 'lfn': 'file2', 'guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'events': 13, 'checksum': 'ad:24ea1d6f'}, + {'dsn': 'testDataset', 'lfn': 'file3', 'guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', 'events': 14, 'checksum': 'ad:e4ddac3b'}, + {'dsn': 'testDataset', 'lfn': 'file4', 'guid': 'CAB26113-8CEC-405A-BEDB-9B1CFDD96DA8', 'events': 15, 'checksum': 'ad:65262635', 'extra': 'something'}, + ], + io='input') + self.assertEqual(t0file.value, ['file1', 'file2', 'file3', 'file4']) + self.assertEqual(t0file.getSingleMetadata(fname='file1', metadataKey='file_guid', populate=False), 'D6F5F632-4EA6-4EA6-9A78-9CF59C247094') + self.assertEqual(t0file.getSingleMetadata(fname='file2', metadataKey='nentries', populate=False), 13) + self.assertEqual(t0file.getSingleMetadata(fname='file1', metadataKey='nentries', populate=False), 12) + self.assertEqual(t0file.getSingleMetadata(fname='file3', metadataKey='checksum', populate=False), 'ad:e4ddac3b') + self.assertEqual(t0file.dataset, 'testDataset') + + ## @brief Test that we fail when lfn is missing or dataset is inconsistent + def test_argFileTier0DictBad(self): + t0file = argFile(None, io="input") + self.assertRaises(trfExceptions.TransformArgException, t0file.valueSetter, + [{'dsn': 'testDataset', 'guid': 'D6F5F632-4EA6-4EA6-9A78-9CF59C247094', 'events': 12, 'checksum': 'ad:fcd568ea'}, + {'dsn': 'testDataset', 'lfn': 'file4', 'guid': 'CAB26113-8CEC-405A-BEDB-9B1CFDD96DA8', 'events': 15, 'checksum': 'ad:65262635'}, + ]) + self.assertRaises(trfExceptions.TransformArgException, t0file.valueSetter, + [{'dsn': 'testDataset', 'lfn': 'file1', 'guid': 'D6F5F632-4EA6-4EA6-9A78-9CF59C247094', 'events': 12, 'checksum': 'ad:fcd568ea'}, + {'dsn': 'brokenDataset', 'lfn': 'file2', 'guid': '05ACBDD0-5F5F-4E2E-974A-BBF4F4FE6F0B', 'events': 13, 'checksum': 'ad:24ea1d6f'}, + {'dsn': 'testDataset', 'lfn': 'file3', 'guid': 'F5BA4602-6CA7-4111-B3C7-CB06486B30D9', 'events': 14, 'checksum': 'ad:e4ddac3b'}, + {'dsn': 'testDataset', 'lfn': 'file4', 'guid': 'CAB26113-8CEC-405A-BEDB-9B1CFDD96DA8', 'events': 15, 'checksum': 'ad:65262635'}, + ]) + + ## @brief Tier-0 dsn#lfn notation test + def test_argFileTier0withDSNString(self): + withDataset = argFile('fakeDatasetName#file1') + self.assertEqual(withDataset.dataset, 'fakeDatasetName') + self.assertEqual(withDataset.value, ['file1']) + + def test_argFileTier0withDSNArray(self): + t0file = argFile(["testDataset#file1", "testDataset#file2", "testDataset#file3", "testDataset#file4"], io="input") + self.assertEqual(t0file.dataset, "testDataset") + self.assertEqual(t0file.value, ['file1', 'file2', 'file3', 'file4']) + + def test_argFileTier0withDSNBad(self): + t0file = argFile(None, io="input") + # Cannot have inconsistent dataset names + self.assertRaises(trfExceptions.TransformArgException, t0file.valueSetter, + ["testDataset#file1", "testDataset#file2", "testDataset#file3", "testDatasetDifferent#file4"]) + # Cannot have some files with missing dataset names + self.assertRaises(trfExceptions.TransformArgException, t0file.valueSetter, + ["testDataset#file1", "testDataset#file2", "testDataset#file3", "file4"]) + # Cannot change dataset name + t0file.dataset = "originalDSN" + self.assertRaises(trfExceptions.TransformArgException, t0file.valueSetter, + ["testDataset#file1", "testDataset#file2", "testDataset#file3", "testDataset#file4"]) if __name__ == '__main__': unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfArgClassesATLAS.py b/Tools/PyJobTransforms/test/test_trfArgClassesATLAS.py index dabddc8541cd370a259c2764cb5ba4c7cef52671..1432676009529a2e28a5bb72ec86217336f14052 100755 --- a/Tools/PyJobTransforms/test/test_trfArgClassesATLAS.py +++ b/Tools/PyJobTransforms/test/test_trfArgClassesATLAS.py @@ -5,7 +5,7 @@ ## @Package test_trfArgClasses.py # @brief Unittests for test_trfArgClasses.py # @author graeme.andrew.stewart@cern.ch -# @version $Id: test_trfArgClassesATLAS.py 628398 2014-11-14 12:54:56Z graemes $ +# @version $Id: test_trfArgClassesATLAS.py 667158 2015-05-14 16:14:07Z vanyash $ # @note Tests of ATLAS specific file formats (that thus rely on other # parts of Athena) live here @@ -17,6 +17,9 @@ from PyJobTransforms.trfLogger import msg # Allowable to import * from the package for which we are the test suite from PyJobTransforms.trfArgClasses import * +# Stripped down key list for files which are inputs +from PyJobTransforms.trfFileUtils import inpFileInterestingKeys + class argFileEOSTests(unittest.TestCase): def test_SimExpansion(self): hitsInputs = argFile('root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.743321._000[029,200].pool.root.1,root://eosatlas//eos/atlas/atlascerngroupdisk/proj-sit/digitization/RTT/mc12a/mc12_8TeV.119995.Pythia8_A2MSTW2008LO_minbias_inelastic_low.merge.HITS.e1119_s1469_s1471/HITS.743321._000[614,817].pool.root.5', io='input') @@ -48,6 +51,8 @@ class argPOOLFiles(unittest.TestCase): testFile = '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root' os.stat(testFile) esdFile = argPOOLFile(testFile, io = 'input', type='esd') + self.assertEqual(esdFile.getMetadata(metadataKeys = tuple(inpFileInterestingKeys)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'file_type': 'pool', 'file_guid': '0CABA22E-9096-E011-AE25-0030487C8CE6', 'nentries': 10L, 'file_size': 17033381}}) + esdFile = argPOOLFile(testFile, io = 'output', type='esd') self.assertEqual(esdFile.getMetadata(), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'_exists': True, 'run_number': [182796L], 'beam_energy': [3500000.0], 'file_type': 'pool', 'AODFixVersion': '', 'file_size': 17033381L, 'geometry': 'ATLAS-GEO-16-00-01', 'file_guid': '0CABA22E-9096-E011-AE25-0030487C8CE6', 'beam_type': ['collisions'], 'lumi_block': [300L], 'conditions_tag': 'COMCOND-BLKPST-004-00', 'integrity': True, 'nentries': 10L}}) self.assertEqual(esdFile.getMetadata(metadataKeys = ('nentries',)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.ESD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'nentries': 10}}) self.assertEqual(esdFile.prodsysDescription['type'],'file') @@ -60,6 +65,8 @@ class argPOOLFiles(unittest.TestCase): testFile = '/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root' os.stat(testFile) aodFile = argPOOLFile(testFile, io = 'input', type='aod') + self.assertEqual(aodFile.getMetadata(metadataKeys = tuple(inpFileInterestingKeys)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'file_type': 'pool', 'file_guid': '6E1FE6F0-9096-E011-9DDA-0030487C8CE6', 'nentries': 10L, 'file_size': 4673269}}) + aodFile = argPOOLFile(testFile, io = 'output', type='aod') self.assertEqual(aodFile.getMetadata(), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'_exists': True, 'run_number': [182796L], 'beam_energy': [3500000.0], 'file_type': 'pool', 'AODFixVersion': '', 'file_size': 4673269L, 'geometry': 'ATLAS-GEO-16-00-01', 'file_guid': '6E1FE6F0-9096-E011-9DDA-0030487C8CE6', 'beam_type': ['collisions'], 'lumi_block': [300L], 'conditions_tag': 'COMCOND-BLKPST-004-00', 'integrity': True, 'nentries': 10L}}) self.assertEqual(aodFile.getMetadata(metadataKeys = ('nentries',)), {'/afs/cern.ch/atlas/offline/test/data11_7TeV.00182796.physics_JetTauEtmiss.merge.AOD._lb0300._SFO-10._0001.1.10evts.16.6.6.4.pool.root': {'nentries': 10}}) self.assertEqual(aodFile.prodsysDescription['type'],'file') diff --git a/Tools/PyJobTransforms/test/test_trfArgs.py b/Tools/PyJobTransforms/test/test_trfArgs.py index 58bb53733d18ca92924e8e41021d8f7c5095955a..376e458da5c703efb1c2bc0484cde80b82f52ebe 100755 --- a/Tools/PyJobTransforms/test/test_trfArgs.py +++ b/Tools/PyJobTransforms/test/test_trfArgs.py @@ -5,10 +5,12 @@ ## @Package test_trfArgs.py # @brief Unittests for trfArgs.py # @author maddocks.harvey@gmail.com, graeme.andrew.stewart@cern.ch -# @version $Id: test_trfArgs.py 623865 2014-10-24 12:39:44Z graemes $ +# @version $Id: test_trfArgs.py 682012 2015-07-10 07:44:44Z graemes $ import argparse +import json import os +import subprocess import unittest import logging @@ -105,5 +107,25 @@ class trfFloatArgsUnitTests(unittest.TestCase): self.assertRaises(SystemExit, tf.parseCmdLineArgs, ['--mugVolume', 'LOL']) +class configureFromJSON(unittest.TestCase): + def setUp(self): + with open('argdict.json', 'w') as argdict: + argdict.write('''{"conditionsTag": { "all": "CONDBR2-BLKPA-2015-05" }, "geometryVersion": { "all": "ATLAS-R2-2015-03-01-00" }, "preExec": { "athena": [ "print 'Python says hi!'" ] }, "skipEvents": { "first": 10 }, "testFloat": 4.67, "testInt": 5 }''') + + def tearDown(self): + for f in 'argdict.json', 'rewrite.json': + try: + os.unlink(f) + except OSError: + pass + + def test_configFromJSON(self): + cmd = ['Athena_tf.py', '--argJSON', 'argdict.json', '--dumpJSON', 'rewrite.json'] + self.assertEqual(subprocess.call(cmd), 0) + self.maxDiff = None + with open('rewrite.json') as rewritten_json: + rewrite = json.load(rewritten_json) + self.assertEqual(rewrite, {u'argJSON': u'argdict.json', u"conditionsTag": { u"all": u"CONDBR2-BLKPA-2015-05" }, u"geometryVersion": { u"all": u"ATLAS-R2-2015-03-01-00" }, u"preExec": { u"athena": [ u"print 'Python says hi!'" ] }, u"skipEvents": { u"first": 10 }, u"testFloat": 4.67, u"testInt": 5 }) + if __name__ == '__main__': unittest.main() diff --git a/Tools/PyJobTransforms/test/test_trfMPTools.py b/Tools/PyJobTransforms/test/test_trfMPTools.py new file mode 100755 index 0000000000000000000000000000000000000000..f842c4b7ad74c6589f88227093e35b8734c5b9e0 --- /dev/null +++ b/Tools/PyJobTransforms/test/test_trfMPTools.py @@ -0,0 +1,162 @@ +#! /usr/bin/env python + +# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration + +## @Package test_trfMPTools.py +# @brief Unittests for trfMPTools.py +# @author graeme.andrew.stewart@cern.ch +# @version $Id: test_trfMPTools.py 677748 2015-06-23 20:29:35Z graemes $ + +import os +import subprocess +import unittest + +import logging +msg = logging.getLogger(__name__) + +# Allowable to import * from the package for which we are the test suite +from PyJobTransforms.trfMPTools import * +from PyJobTransforms.trfArgClasses import argList, argFile + +import PyJobTransforms.trfExceptions as trfExceptions + + +## Unit tests +class AthenaMPProcTests(unittest.TestCase): + def setUp(self): + os.environ.pop("ATHENA_PROC_NUMBER", "") + + def test_noMP(self): + self.assertEqual(detectAthenaMPProcs(), 0) + + def test_noMPwithArgdict(self): + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['some', 'random', 'values'])} + self.assertEqual(detectAthenaMPProcs(argdict), 0) + + def test_MPfromEnv(self): + os.environ["ATHENA_PROC_NUMBER"] = "8" + self.assertEqual(detectAthenaMPProcs(), 8) + + def test_MPfromEnvEmpty(self): + os.environ["ATHENA_PROC_NUMBER"] = "0" + self.assertEqual(detectAthenaMPProcs(), 0) + + def test_MPBadfromEnv(self): + os.environ["ATHENA_PROC_NUMBER"] = "-1" + self.assertRaises(trfExceptions.TransformExecutionException, detectAthenaMPProcs) + os.environ["ATHENA_PROC_NUMBER"] = "notAnInt" + self.assertRaises(trfExceptions.TransformExecutionException, detectAthenaMPProcs) + + def test_MPfromArgdict(self): + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['--nprocs=8', 'random', 'values'])} + self.assertEqual(detectAthenaMPProcs(argdict), 8) + + def test_MPfromArgdictEmpty(self): + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['--nprocs=0', 'random', 'values'])} + self.assertEqual(detectAthenaMPProcs(argdict), 0) + + def test_MPfromArgdictBad(self): + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['--nprocs=-4', 'random', 'values'])} + self.assertRaises(trfExceptions.TransformExecutionException, detectAthenaMPProcs, argdict) + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['--nprocs=notAnInt', 'random', 'values'])} + self.assertRaises(trfExceptions.TransformExecutionException, detectAthenaMPProcs, argdict) + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['--nprocs=4', '--nprocs=8', 'values'])} + self.assertRaises(trfExceptions.TransformExecutionException, detectAthenaMPProcs, argdict) + + def test_MPfromBoth(self): + # Env should have priority + os.environ["ATHENA_PROC_NUMBER"] = "4" + argdict={'movealong': argList('nothing to see here'), 'athenaopts': argList(['--nprocs=2', 'random', 'values'])} + self.assertEqual(detectAthenaMPProcs(argdict), 4) + + +class AthenaMPOutputParseTests(unittest.TestCase): + def setUp(self): + # Gah, this is a pest to setup! + cwd = os.getcwd() + outputStruct = [('.', [], ['data15_13TeV.00267167.physics_Main.merge.RAW._lb0176._SFO-1._0001.1']), ('athenaMP-workers-RAWtoESD-r2e', ['worker_3', 'worker_7', 'worker_4', 'worker_5', 'worker_2', 'worker_6', 'evt_counter', 'worker_1', 'worker_0'], []), ('athenaMP-workers-RAWtoESD-r2e/worker_3', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_7', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_4', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_5', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_2', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_6', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/evt_counter', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_1', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out']), ('athenaMP-workers-RAWtoESD-r2e/worker_0', [], ['tmp.HIST_ESD_INT', 'AthenaMP.log', 'data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002', 'eventLoopHeartBeat.txt', 'ntuple_RAWtoESD.pmon.gz', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002', 'FileManagerLog', 'PoolFileCatalog.xml.BAK', 'PoolFileCatalog.xml', 'data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002', 'data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002', 'AtRanluxGenSvc.out'])] + for delement in outputStruct: + try: + os.mkdir(delement[0]) + except OSError: + pass + for subdir in delement[1]: + try: + os.mkdir(os.path.join(delement[0], subdir)) + except OSError: + pass + for fname in delement[2]: + open(os.path.join(delement[0], fname), "w") + + with open("athenaMP-outputs-RAWtoESD-r2e", "w") as mpoutput: + print >>mpoutput, """<?xml version="1.0" encoding="utf-8"?> +<athenaFileReport> + <Files OriginalName="data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002"> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_0/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_1/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_2/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_3/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_4/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_5/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_6/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + <File description="POOL" mode="WRITE|CREATE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_7/data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002" shared="True" technology="ROOT"/> + </Files> + <Files OriginalName="tmp.HIST_ESD_INT"> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_0/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_1/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_2/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_3/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_4/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_5/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_6/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + <File description="HIST" mode="WRITE" name="{CWD}/athenaMP-workers-RAWtoESD-r2e/worker_7/tmp.HIST_ESD_INT" shared="False" technology="ROOT"/> + </Files> +</athenaFileReport> +""".format(CWD=os.getcwd()) + + def tearDown(self): + subprocess.call(['rm -fr athenaMP* data15* tmp.*'], shell=True) + + def test_basicMPoutputs(self): + dataDict = {'BS': argFile("data15_13TeV.00267167.physics_Main.merge.RAW._lb0176._SFO-1._0001.1", io="input"), + 'ESD': argFile("data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002"), + 'HIST_ESD_INT': argFile("tmp.HIST_ESD_INT"), + 'DRAW_EMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002"), + 'DRAW_EGZ': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002"), + 'DRAW_TAUMUH': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002"), + 'DRAW_ZMUMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002"),} + self.assertEqual(athenaMPOutputHandler("athenaMP-outputs-RAWtoESD-r2e", "athenaMP-workers-RAWtoESD-r2e", dataDict, 8), None) + + def test_missingMPoutputs(self): + dataDict = {'ESD': argFile("data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002"), + 'HIST_ESD_INT': argFile("tmp.HIST_ESD_INT"), + 'DRAW_EMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002"), + 'DRAW_EGZ': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002"), + 'DRAW_TAUMUH': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002"), + 'DRAW_NOTHERE': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_NOTHERE.f594._lb0176._SFO-1._0002"), + 'DRAW_ZMUMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002"),} + self.assertRaises(trfExceptions.TransformExecutionException, athenaMPOutputHandler, "athenaMP-outputs-RAWtoESD-r2e", "athenaMP-workers-RAWtoESD-r2e", dataDict, 8) + + def test_wrongMPoutputs(self): + dataDict = {'ESD': argFile("data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002"), + 'HIST_ESD_INT': argFile("tmp.HIST_ESD_INT"), + 'DRAW_EMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002"), + 'DRAW_EGZ': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002"), + 'DRAW_TAUMUH': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002"), + 'DRAW_NOTHERE': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_NOTHERE.f594._lb0176._SFO-1._0002"), + 'DRAW_ZMUMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002"),} + self.assertRaises(trfExceptions.TransformExecutionException, athenaMPOutputHandler, "athenaMP-outputs-RAWtoESD-r2e", "athenaMP-workers-RAWtoESD-r2e", dataDict, 20) + + def test_wrongMPoutputDir(self): + dataDict = {'ESD': argFile("data15_13TeV.00267167.physics_Main.recon.ESD.f594._lb0176._SFO-1._0002"), + 'HIST_ESD_INT': argFile("tmp.HIST_ESD_INT"), + 'DRAW_EMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EMU.f594._lb0176._SFO-1._0002"), + 'DRAW_EGZ': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_EGZ.f594._lb0176._SFO-1._0002"), + 'DRAW_TAUMUH': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_TAUMUH.f594._lb0176._SFO-1._0002"), + 'DRAW_NOTHERE': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_NOTHERE.f594._lb0176._SFO-1._0002"), + 'DRAW_ZMUMU': argFile("data15_13TeV.00267167.physics_Main.recon.DRAW_ZMUMU.f594._lb0176._SFO-1._0002"),} + self.assertRaises(trfExceptions.TransformExecutionException, athenaMPOutputHandler, "athenaMP-outputs-RAWtoESD-r2e-missing", "athenaMP-workers-RAWtoESD-r2e", dataDict, 20) + + +if __name__ == '__main__': + unittest.main()