From cf5a4c7980ca9e55c555dce1a6bbb0ddd271e135 Mon Sep 17 00:00:00 2001 From: Marcelo Vogel <mavogel@cern.ch> Date: Wed, 5 Sep 2018 14:01:14 +0200 Subject: [PATCH] Implementation of new feature for Archive_tf (ATLASJT-383) As requested by ADC, the archiving transform now deletes input files right after they are added to the output zip archive. This commit also drops the support for handling tar archives, which was approved. Former-commit-id: 0f5c40b9c66e05d0b1a5aabccf6b0a0851666fb5 --- Tools/PyJobTransforms/python/trfExe.py | 77 ++++----------------- Tools/PyJobTransforms/scripts/Archive_tf.py | 6 -- 2 files changed, 14 insertions(+), 69 deletions(-) diff --git a/Tools/PyJobTransforms/python/trfExe.py b/Tools/PyJobTransforms/python/trfExe.py index f8920425c94fb..311211946fb44 100755 --- a/Tools/PyJobTransforms/python/trfExe.py +++ b/Tools/PyJobTransforms/python/trfExe.py @@ -1869,11 +1869,8 @@ class archiveExecutor(scriptExecutor): self.setPreExeStart() self._memMonitor = False - if 'exe' in self.conf.argdict: - self._exe = self.conf.argdict['exe'] - #unpack archived inputs - import tarfile, zipfile + import zipfile if 'inputDataFile' in self.conf.argdict: for f in self.conf.argdict['inputDataFile'].value: if zipfile.is_zipfile(f): @@ -1881,57 +1878,9 @@ class archiveExecutor(scriptExecutor): print 'Extracting input zip file {0} to temporary directory {1}'.format(f,'tmp') archive.extractall('tmp') archive.close() - elif tarfile.is_tarfile(f): - archive = tarfile.open(f, 'r:*') - print 'Extracting input tar file {0} to temporary directory {1}'.format(f,'tmp') - archive.extractall('tmp') - archive.close() - - #proceed to archive - if self._exe == 'tar': - #this is needed to keep the transform from scheduling two sub-steps - if 'outputArchFile' not in self.conf.argdict: - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_ARG_MISSING'), 'Missing output file name') - - self._cmd = ['python'] - try: - with open('tar_wrapper.py', 'w') as tar_wrapper: - print >> tar_wrapper, "import zipfile, tarfile, os, shutil" - if os.path.exists(self.conf.argdict['outputArchFile'].value[0]): - #appending input file(s) to existing archive. Compressed writing in append mode is not possible - print >> tar_wrapper, "tar = tarfile.open('{}', 'a:')".format(self.conf.argdict['outputArchFile'].value[0]) - else: - #creating new archive - if 'compressionType' in self.conf.argdict: - if self.conf.argdict['compressionType'] == 'gzip': - print >> tar_wrapper, "tar = tarfile.open('{}', 'w:gz')".format(self.conf.argdict['outputArchFile'].value[0]) - elif self.conf.argdict['compressionType'] == 'bzip2': - print >> tar_wrapper, "tar = tarfile.open('{}', 'w:bz2')".format(self.conf.argdict['outputArchFile'].value[0]) - elif self.conf.argdict['compressionType'] == 'none': - print >> tar_wrapper, "tar = tarfile.open('{}', 'w:')".format(self.conf.argdict['outputArchFile'].value[0]) - print >> tar_wrapper, "for f in {}:".format(self.conf.argdict['inputDataFile'].value) - print >> tar_wrapper, " if not zipfile.is_zipfile(f) and not tarfile.is_tarfile(f):" - print >> tar_wrapper, " print 'Tarring {}'.format(os.path.basename(f))" - print >> tar_wrapper, " tar.add(f)" - print >> tar_wrapper, "if os.path.isdir('tmp'):" - print >> tar_wrapper, " for root, dirs, files in os.walk('tmp'):" - print >> tar_wrapper, " for name in files:" - print >> tar_wrapper, " print 'Tarring {}'.format(name)" - print >> tar_wrapper, " tar.add(os.path.join(root, name),name)" - print >> tar_wrapper, " shutil.rmtree('tmp')" - print >> tar_wrapper, "tar.close()" - os.chmod('tar_wrapper.py', 0755) - except (IOError, OSError) as e: - errMsg = 'error writing tar wrapper {fileName}: {error}'.format(fileName = 'tar_wrapper.py', - error = e - ) - msg.error(errMsg) - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'), - errMsg - ) - self._cmd.append('tar_wrapper.py') - elif self._exe == 'zip': + #archiving + if self._exe == 'zip': if 'outputArchFile' not in self.conf.argdict: raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_ARG_MISSING'), 'Missing output file name') @@ -1946,9 +1895,12 @@ class archiveExecutor(scriptExecutor): #creating new archive print >> zip_wrapper, "zf = zipfile.ZipFile('{}', mode='w', allowZip64=True)".format(self.conf.argdict['outputArchFile'].value[0]) print >> zip_wrapper, "for f in {}:".format(self.conf.argdict['inputDataFile'].value) - print >> zip_wrapper, " if not zipfile.is_zipfile(f) and not tarfile.is_tarfile(f):" + print >> zip_wrapper, " if not zipfile.is_zipfile(f):" print >> zip_wrapper, " print 'Zipping {}'.format(os.path.basename(f))" print >> zip_wrapper, " zf.write(f, arcname=os.path.basename(f), compress_type=zipfile.ZIP_STORED)" + print >> zip_wrapper, " if os.access(f, os.F_OK):" + print >> zip_wrapper, " print 'Removing input file {}'.format(f)" + print >> zip_wrapper, " os.unlink(f)" print >> zip_wrapper, "if os.path.isdir('tmp'):" print >> zip_wrapper, " for root, dirs, files in os.walk('tmp'):" print >> zip_wrapper, " for name in files:" @@ -1967,19 +1919,18 @@ class archiveExecutor(scriptExecutor): ) self._cmd.append('zip_wrapper.py') + #unarchiving elif self._exe == 'unarchive': - if not zipfile.is_zipfile(self.conf.argdict['inputArchFile'].value[0]) and not tarfile.is_tarfile(self.conf.argdict['inputArchFile'].value[0]): - raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), - 'The input file is not a zip or tar archive - aborting unpacking') + for infile in self.conf.argdict['inputArchFile'].value: + if not zipfile.is_zipfile(infile): + raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'), + 'An input file is not a zip archive - aborting unpacking') self._cmd = ['python'] try: with open('unarchive_wrapper.py', 'w') as unarchive_wrapper: - print >> unarchive_wrapper, "import zipfile, tarfile" + print >> unarchive_wrapper, "import zipfile" print >> unarchive_wrapper, "for f in {}:".format(self.conf.argdict['inputArchFile'].value) - print >> unarchive_wrapper, " if zipfile.is_zipfile(f):" - print >> unarchive_wrapper, " archive = zipfile.ZipFile(f, mode='r')" - print >> unarchive_wrapper, " elif tarfile.is_tarfile(f):" - print >> unarchive_wrapper, " archive = tarfile.open(f, 'r:*')" + print >> unarchive_wrapper, " archive = zipfile.ZipFile(f, mode='r')" print >> unarchive_wrapper, " path = '{}'".format(self.conf.argdict['path']) print >> unarchive_wrapper, " print 'Extracting archive {0} to {1}'.format(f,path)" print >> unarchive_wrapper, " archive.extractall(path)" diff --git a/Tools/PyJobTransforms/scripts/Archive_tf.py b/Tools/PyJobTransforms/scripts/Archive_tf.py index 2798ca5be0863..83753708fc520 100755 --- a/Tools/PyJobTransforms/scripts/Archive_tf.py +++ b/Tools/PyJobTransforms/scripts/Archive_tf.py @@ -49,9 +49,6 @@ def addMyArgs(parser): # Use arggroup to get these arguments in their own sub-section (of --help) parser.defineArgGroup('Archiver', 'Options') parser.defineArgGroup('Unarchiver', 'Options') - parser.defineArgGroup('Tar archiver', 'Options') - parser.add_argument('--exe', group='Archiver', - help='Archiving command, default is zip', choices=['zip', 'tar']) parser.add_argument('--inputDataFile', '--inputFile', nargs='+', type=trfArgClasses.argFactory(trfArgClasses.argFile, io='input', type='misc'), help='Input file(s)', group='Archiver') @@ -63,9 +60,6 @@ def addMyArgs(parser): parser.add_argument('--outputArchFile', '--outputFile', type=trfArgClasses.argFactory(trfArgClasses.argFile, io='output', type='misc'), help='Output archive file', group='Archiver') - parser.add_argument('--compressionType', group='Tar archiver', - help='Underlying compression type of tar. The default is none', choices=['gzip', 'bzip2', 'none'], - default='none') if __name__ == '__main__': main() -- GitLab