From 01a9fc950de3c46266ca3d12cc0f9d5426cf67c3 Mon Sep 17 00:00:00 2001 From: Tadej Novak <tadej.novak@cern.ch> Date: Tue, 21 Sep 2021 17:03:57 +0200 Subject: [PATCH] Support appending auxiliary inputs e.g. minbias samples --- .../SimuJobTransforms/python/simTrfArgs.py | 10 +-- ..._Digi_tf_multistep_presampling_CA_vs_CG.sh | 32 ++++++-- Tools/PyJobTransforms/python/transform.py | 27 +++++-- Tools/PyJobTransforms/python/trfArgClasses.py | 80 +++++++++++-------- Tools/PyJobTransforms/python/trfExe.py | 13 ++- Tools/PyJobTransforms/python/trfJobOptions.py | 21 +++-- Tools/PyJobTransforms/python/trfReports.py | 4 + 7 files changed, 126 insertions(+), 61 deletions(-) diff --git a/Simulation/SimuJobTransforms/python/simTrfArgs.py b/Simulation/SimuJobTransforms/python/simTrfArgs.py index 52514eff937d..859c4077eaac 100644 --- a/Simulation/SimuJobTransforms/python/simTrfArgs.py +++ b/Simulation/SimuJobTransforms/python/simTrfArgs.py @@ -75,19 +75,19 @@ def addPileUpTrfArgs(parser): parser.add_argument('--testPileUpConfig', type=argFactory(argBool), help='Calculates the number of background events that will be require for a given pile-up configuration.', group='PileUp') - parser.add_argument('--inputLowPtMinbiasHitsFile','--LowPtMinbiasHitsFile', nargs='+', + parser.add_argument('--inputLowPtMinbiasHitsFile','--LowPtMinbiasHitsFile', nargs='+', action='append', type=argFactory(argHITSFile, io='input', executor=['EVNTtoRDO','HITtoRDO'], auxiliaryFile=True), help='Input HITS file for low pT minimum bias pile-up sub-events', group='PileUp') - parser.add_argument('--inputHighPtMinbiasHitsFile','--HighPtMinbiasHitsFile', nargs='+', + parser.add_argument('--inputHighPtMinbiasHitsFile','--HighPtMinbiasHitsFile', nargs='+', action='append', type=argFactory(argHITSFile, io='input', executor=['EVNTtoRDO','HITtoRDO'], auxiliaryFile=True), help='Input HITS file for high pT minimum bias pile-up sub-events', group='PileUp') - parser.add_argument('--inputCavernHitsFile', '--cavernHitsFile', nargs='+', + parser.add_argument('--inputCavernHitsFile', '--cavernHitsFile', nargs='+', action='append', type=argFactory(argHITSFile, io='input', executor=['EVNTtoRDO','HITtoRDO'], auxiliaryFile=True), help='Input HITS file for cavern background sub-events', group='PileUp') - parser.add_argument('--inputBeamHaloHitsFile', '--beamHaloHitsFile', nargs='+', + parser.add_argument('--inputBeamHaloHitsFile', '--beamHaloHitsFile', nargs='+', action='append', type=argFactory(argHITSFile, io='input', executor=['EVNTtoRDO','HITtoRDO'], auxiliaryFile=True), help='Input HITS file for beam halo sub-events', group='PileUp'), - parser.add_argument('--inputBeamGasHitsFile', '--beamGasHitsFile', nargs='+', + parser.add_argument('--inputBeamGasHitsFile', '--beamGasHitsFile', nargs='+', action='append', type=argFactory(argHITSFile, io='input', executor=['EVNTtoRDO','HITtoRDO'], auxiliaryFile=True), help='Input HITS file for beam gas sub-events', group='PileUp') parser.add_argument('--numberOfLowPtMinBias', diff --git a/Simulation/Tests/DigitizationTests/test/test_Digi_tf_multistep_presampling_CA_vs_CG.sh b/Simulation/Tests/DigitizationTests/test/test_Digi_tf_multistep_presampling_CA_vs_CG.sh index c9fc9428395d..f3193e5dbafe 100755 --- a/Simulation/Tests/DigitizationTests/test/test_Digi_tf_multistep_presampling_CA_vs_CG.sh +++ b/Simulation/Tests/DigitizationTests/test/test_Digi_tf_multistep_presampling_CA_vs_CG.sh @@ -12,8 +12,12 @@ Events=100 DigiOutFileNameCG="multistep_presampling.CG.RDO.pool.root" DigiOutFileNameCA="multistep_presampling.CA.RDO.pool.root" HSHitsFile="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/OverlayTests/mc16_13TeV.900149.PG_single_nu_Pt50.simul.HITS.e8307_s3482/HITS.24078104._234467.pool.root.1" -HighPtMinbiasHitsFiles="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.800831.Py8EG_minbias_inelastic_highjetphotonlepton.simul.HITS_FILT.e8341_s3687_s3704/*" -LowPtMinbiasHitsFiles="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.900311.Epos_minbias_inelastic_lowjetphoton.simul.HITS_FILT.e8341_s3687_s3704/*" +HighPtMinbiasHitsFiles1="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.800831.Py8EG_minbias_inelastic_highjetphotonlepton.simul.HITS_FILT.e8341_s3687_s3704/HITS_FILT.26106512._000149.pool.root.1" +HighPtMinbiasHitsFiles2="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.800831.Py8EG_minbias_inelastic_highjetphotonlepton.simul.HITS_FILT.e8341_s3687_s3704/HITS_FILT.26106512._000581.pool.root.1" +HighPtMinbiasHitsFiles3="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.800831.Py8EG_minbias_inelastic_highjetphotonlepton.simul.HITS_FILT.e8341_s3687_s3704/HITS_FILT.26106512._000717.pool.root.1" +LowPtMinbiasHitsFiles1="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.900311.Epos_minbias_inelastic_lowjetphoton.simul.HITS_FILT.e8341_s3687_s3704/HITS_FILT.26106626._000068.pool.root.1" +LowPtMinbiasHitsFiles2="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.900311.Epos_minbias_inelastic_lowjetphoton.simul.HITS_FILT.e8341_s3687_s3704/HITS_FILT.26106626._000480.pool.root.1" +LowPtMinbiasHitsFiles3="/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/Tier0ChainTests/mc16_13TeV.900311.Epos_minbias_inelastic_lowjetphoton.simul.HITS_FILT.e8341_s3687_s3704/HITS_FILT.26106626._000574.pool.root.1" # config only @@ -26,8 +30,12 @@ Digi_tf.py \ --digiSteeringConf "StandardSignalOnlyTruth" \ --geometryVersion default:ATLAS-R2-2016-01-00-01 \ --inputHITSFile ${HSHitsFile} \ ---inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles} \ ---inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles} \ +--inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles1} \ +--inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles2} \ +--inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles3} \ +--inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles1} \ +--inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles2} \ +--inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles3} \ --jobNumber 568 \ --maxEvents ${Events} \ --outputRDOFile ${DigiOutFileNameCG} \ @@ -51,8 +59,12 @@ Digi_tf.py \ --digiSteeringConf "StandardSignalOnlyTruth" \ --geometryVersion default:ATLAS-R2-2016-01-00-01 \ --inputHITSFile ${HSHitsFile} \ ---inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles} \ ---inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles} \ +--inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles1} \ +--inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles2} \ +--inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles3} \ +--inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles1} \ +--inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles2} \ +--inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles3} \ --jobNumber 568 \ --maxEvents ${Events} \ --outputRDOFile ${DigiOutFileNameCG} \ @@ -91,8 +103,12 @@ then --digiSteeringConf "StandardSignalOnlyTruth" \ --geometryVersion default:ATLAS-R2-2016-01-00-01 \ --inputHITSFile ${HSHitsFile} \ - --inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles} \ - --inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles} \ + --inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles1} \ + --inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles2} \ + --inputHighPtMinbiasHitsFile ${HighPtMinbiasHitsFiles3} \ + --inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles1} \ + --inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles2} \ + --inputLowPtMinbiasHitsFile ${LowPtMinbiasHitsFiles3} \ --jobNumber 568 \ --maxEvents ${Events} \ --outputRDOFile ${DigiOutFileNameCA} \ diff --git a/Tools/PyJobTransforms/python/transform.py b/Tools/PyJobTransforms/python/transform.py index 946bfabe469d..7ef44b7f8004 100644 --- a/Tools/PyJobTransforms/python/transform.py +++ b/Tools/PyJobTransforms/python/transform.py @@ -328,7 +328,11 @@ class transform(object): for k, v in self._argdict.items(): if isinstance(v, argument): v.name = k - + elif isinstance(v, list): + for it in v: + if isinstance(it, argument): + it.name = k + # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit if 'dumpPickle' in self._argdict: msg.info('Now dumping pickled version of command line to {0}'.format(self._argdict['dumpPickle'])) @@ -497,13 +501,20 @@ class transform(object): # Note specifier [A-Za-z0-9_]+? makes this match non-greedy (avoid swallowing the optional 'File' suffix) m = re.match(r'(input|output|tmp)([A-Za-z0-9_]+?)(File)?$', key) # N.B. Protect against taking argunents which are not type argFile - if m and isinstance(value, argFile): - if m.group(1) == 'input': - self._inputData.append(m.group(2)) - else: - self._outputData.append(m.group(2)) - self._dataDictionary[m.group(2)] = value - + if m: + if isinstance(value, argFile): + if m.group(1) == 'input': + self._inputData.append(m.group(2)) + else: + self._outputData.append(m.group(2)) + self._dataDictionary[m.group(2)] = value + elif isinstance(value, list) and value and isinstance(value[0], argFile): + if m.group(1) == 'input': + self._inputData.append(m.group(2)) + else: + self._outputData.append(m.group(2)) + self._dataDictionary[m.group(2)] = value + ## @note If we have no real data then add the pseudo datatype NULL, which allows us to manage # transforms which can run without data if len(self._inputData) == 0: diff --git a/Tools/PyJobTransforms/python/trfArgClasses.py b/Tools/PyJobTransforms/python/trfArgClasses.py index 482dad31beba..1e0170b57012 100644 --- a/Tools/PyJobTransforms/python/trfArgClasses.py +++ b/Tools/PyJobTransforms/python/trfArgClasses.py @@ -514,7 +514,7 @@ class argFile(argList): self._mergeTargetSize = mergeTargetSize self._auxiliaryFile = auxiliaryFile self._originalName = None - + # User setter to get valid value check self.io = io @@ -2488,50 +2488,66 @@ class trfArgParser(argparse.ArgumentParser): def allArgs(self): return list(self._helpString) + # @brief parsing helper + def _parse_list_helper(self, value): + # We build on the value[0] instance as this contains the correct metadata + # and object references for this instance (shallow copying can + # mess up object references and deepcopy thows exceptions!) + newValueObj = value[0] + msg.debug('Started with: %s = %s', type(newValueObj), newValueObj) + if isinstance(value[0], argSubstep): + # Make sure you do not have a reference to the original value - this is a deeper copy + newValues = dictSubstepMerge(value[0].value, {}) + elif isinstance(value[0], list): + if len(value) == 1: + return self._parse_list_helper(value[0]) + msg.debug('Handling a list of arguments for key') + newValues = [] + for v in value: + processedValueObj, processedValues = self._parse_list_helper(v) + processedValueObj.value = processedValues + newValues.append(processedValueObj) + newValueObj = newValues + return newValueObj, newValues + elif isinstance(value[0].value, list): + newValues = value[0].value + elif isinstance(value[0].value, dict): + newValues = value[0].value + else: + newValues = [value[0].value,] + for valueObj in value[1:]: + msg.debug('Value Object: %s = %s', type(valueObj), valueObj) + if isinstance(value[0], argSubstep): + # Special merger for lists attached to substeps + newValues = dictSubstepMerge(newValues, valueObj.value) + elif isinstance(valueObj.value, list): + # General lists are concatenated + newValues.extend(valueObj.value) + elif isinstance(valueObj.value, dict): + # General dictionaries are merged + newValues.update(valueObj.value) + else: + newValues.append(valueObj.value) + return newValueObj, newValues ## @brief Call argument_parser parse_args, then concatenate values # @details Sets-up the standard argparse namespace, then use a special # treatment for lists (arising from nargs='+'), where values # are appropriately concatenated and a single object is returned # @return argument_parser namespace object - def parse_args(self, args = None, namespace = None): + def parse_args(self, args = None, namespace = None): if namespace: - super(trfArgParser, self).parse_args(args = args, namespace = namespace) + super(trfArgParser, self).parse_args(args = args, namespace = namespace) else: namespace = super(trfArgParser, self).parse_args(args = args) for k, v in namespace.__dict__.items(): msg.debug('Treating key %s (%s)', k, v) if isinstance(v, list): - # We build on the v[0] instance as this contains the correct metadata - # and object references for this instance (shallow copying can - # mess up object references and deepcopy thows exceptions!) - newValueObj = v[0] - msg.debug('Started with: %s = %s', type(newValueObj), newValueObj) - if isinstance(v[0], argSubstep): - # Make sure you do not have a reference to the original value - this is a deeper copy - newValues = dictSubstepMerge(v[0].value, {}) - elif isinstance(v[0].value, list): - newValues = v[0].value - elif isinstance(v[0].value, dict): - newValues = v[0].value - else: - newValues = [v[0].value,] - for valueObj in v[1:]: - msg.debug('Value Object: %s = %s', type(valueObj), valueObj) - if isinstance(v[0], argSubstep): - # Special merger for lists attached to substeps - newValues = dictSubstepMerge(newValues, valueObj.value) - elif isinstance(valueObj.value, list): - # General lists are concatenated - newValues.extend(valueObj.value) - elif isinstance(valueObj.value, dict): - # General dictionaries are merged - newValues.update(valueObj.value) - else: - newValues.append(valueObj.value) - newValueObj.value = newValues + newValueObj, newValues = self._parse_list_helper(v) + if not isinstance(newValueObj, list): + newValueObj.value = newValues namespace.__dict__[k] = newValueObj - msg.debug('Set to %s', newValueObj.value) + msg.debug('Set to %s', newValues) return namespace diff --git a/Tools/PyJobTransforms/python/trfExe.py b/Tools/PyJobTransforms/python/trfExe.py index fc2b3b2e9ae7..ba3ca11e8b91 100755 --- a/Tools/PyJobTransforms/python/trfExe.py +++ b/Tools/PyJobTransforms/python/trfExe.py @@ -1091,8 +1091,17 @@ class athenaExecutor(scriptExecutor): # See if we have any 'extra' file arguments nameForFiles = commonExecutorStepName(self._name) for dataType, dataArg in self.conf.dataDictionary.items(): - if dataArg.io == 'input' and nameForFiles in dataArg.executor: - inputFiles[dataArg.subtype] = dataArg + if isinstance(dataArg, list) and dataArg: + if self.conf.totalExecutorSteps <= 1: + raise ValueError('Multiple input arguments provided but only running one substep') + if self.conf.totalExecutorSteps != len(dataArg): + raise ValueError(f'{len(dataArg)} input arguments provided but running {self.conf.totalExecutorSteps} substeps') + + if dataArg[self.conf.executorStep].io == 'input' and nameForFiles in dataArg[self.conf.executorStep].executor: + inputFiles[dataArg[self.conf.executorStep].subtype] = dataArg + else: + if dataArg.io == 'input' and nameForFiles in dataArg.executor: + inputFiles[dataArg.subtype] = dataArg msg.debug('Input Files: {0}; Output Files: {1}'.format(inputFiles, outputFiles)) diff --git a/Tools/PyJobTransforms/python/trfJobOptions.py b/Tools/PyJobTransforms/python/trfJobOptions.py index 6e898353908c..c6380a320620 100644 --- a/Tools/PyJobTransforms/python/trfJobOptions.py +++ b/Tools/PyJobTransforms/python/trfJobOptions.py @@ -120,12 +120,21 @@ class JobOptionsTemplate(object): # Now deal with our input and output files print(os.linesep, "# Input data", file=runargsFile) for dataType, dataArg in input.items(): - print('{0}.input{1}File = {2!r}'.format(self._runArgsName, dataType, dataArg.value), file=runargsFile) - print('{0}.input{1}FileType = {2!r}'.format(self._runArgsName, dataType, dataArg.type), file=runargsFile) - # Add the input event count, if we know it - if dataArg.isCached(metadataKeys = ['nentries']): - print('{0}.input{1}FileNentries = {2!r}'.format(self._runArgsName, dataType, dataArg.nentries), file=runargsFile) - print("{0}.{1}FileIO = {2!r}".format(self._runArgsName, dataType, self._exe.conf.dataDictionary[dataType].io), file=runargsFile) + if isinstance(dataArg, list) and dataArg: + dataArgStep = dataArg[self._exe.conf.executorStep] + print('{0}.input{1}File = {2!r}'.format(self._runArgsName, dataType, dataArgStep.value), file=runargsFile) + print('{0}.input{1}FileType = {2!r}'.format(self._runArgsName, dataType, dataArgStep.type), file=runargsFile) + # Add the input event count, if we know it + if dataArgStep.isCached(metadataKeys = ['nentries']): + print('{0}.input{1}FileNentries = {2!r}'.format(self._runArgsName, dataType, dataArgStep.nentries), file=runargsFile) + print("{0}.{1}FileIO = {2!r}".format(self._runArgsName, dataType, dataArgStep.io), file=runargsFile) + else: + print('{0}.input{1}File = {2!r}'.format(self._runArgsName, dataType, dataArg.value), file=runargsFile) + print('{0}.input{1}FileType = {2!r}'.format(self._runArgsName, dataType, dataArg.type), file=runargsFile) + # Add the input event count, if we know it + if dataArg.isCached(metadataKeys = ['nentries']): + print('{0}.input{1}FileNentries = {2!r}'.format(self._runArgsName, dataType, dataArg.nentries), file=runargsFile) + print("{0}.{1}FileIO = {2!r}".format(self._runArgsName, dataType, dataArg.io), file=runargsFile) print(os.linesep, "# Output data", file=runargsFile) for dataType, dataArg in output.items(): diff --git a/Tools/PyJobTransforms/python/trfReports.py b/Tools/PyJobTransforms/python/trfReports.py index 08252250d90f..b3d126c6c1e5 100644 --- a/Tools/PyJobTransforms/python/trfReports.py +++ b/Tools/PyJobTransforms/python/trfReports.py @@ -157,6 +157,8 @@ class trfJobReport(trfReport): myDict['files'][fileType] = [] # Should have a dataDictionary, unless something went wrong very early... for dataType, dataArg in self._trf._dataDictionary.items(): + if isinstance(dataArg, list): # Always skip lists from the report (auxiliary files) + continue if dataArg.auxiliaryFile: # Always skip auxilliary files from the report continue if fileReport[dataArg.io]: @@ -266,6 +268,8 @@ class trfJobReport(trfReport): # Now add information about output files for dataArg in self._trf._dataDictionary.values(): + if isinstance(dataArg, list): # Always skip lists from the report (auxiliary files) + continue if dataArg.io == 'output': for fileEltree in trfFileReport(dataArg).classicEltreeList(fast = fast): trfTree.append(fileEltree) -- GitLab