Skip to content
Snippets Groups Projects
Commit cf5a4c79 authored by Marcelo Vogel's avatar Marcelo Vogel
Browse files

Implementation of new feature for Archive_tf (ATLASJT-383)

As requested by ADC, the archiving transform now deletes input files
right after they are added to the output zip archive. This commit
also drops the support for handling tar archives, which was approved.


Former-commit-id: 0f5c40b9c66e05d0b1a5aabccf6b0a0851666fb5
parent 9e79ce74
No related branches found
No related tags found
No related merge requests found
......@@ -1869,11 +1869,8 @@ class archiveExecutor(scriptExecutor):
self.setPreExeStart()
self._memMonitor = False
if 'exe' in self.conf.argdict:
self._exe = self.conf.argdict['exe']
#unpack archived inputs
import tarfile, zipfile
import zipfile
if 'inputDataFile' in self.conf.argdict:
for f in self.conf.argdict['inputDataFile'].value:
if zipfile.is_zipfile(f):
......@@ -1881,57 +1878,9 @@ class archiveExecutor(scriptExecutor):
print 'Extracting input zip file {0} to temporary directory {1}'.format(f,'tmp')
archive.extractall('tmp')
archive.close()
elif tarfile.is_tarfile(f):
archive = tarfile.open(f, 'r:*')
print 'Extracting input tar file {0} to temporary directory {1}'.format(f,'tmp')
archive.extractall('tmp')
archive.close()
#proceed to archive
if self._exe == 'tar':
#this is needed to keep the transform from scheduling two sub-steps
if 'outputArchFile' not in self.conf.argdict:
raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_ARG_MISSING'), 'Missing output file name')
self._cmd = ['python']
try:
with open('tar_wrapper.py', 'w') as tar_wrapper:
print >> tar_wrapper, "import zipfile, tarfile, os, shutil"
if os.path.exists(self.conf.argdict['outputArchFile'].value[0]):
#appending input file(s) to existing archive. Compressed writing in append mode is not possible
print >> tar_wrapper, "tar = tarfile.open('{}', 'a:')".format(self.conf.argdict['outputArchFile'].value[0])
else:
#creating new archive
if 'compressionType' in self.conf.argdict:
if self.conf.argdict['compressionType'] == 'gzip':
print >> tar_wrapper, "tar = tarfile.open('{}', 'w:gz')".format(self.conf.argdict['outputArchFile'].value[0])
elif self.conf.argdict['compressionType'] == 'bzip2':
print >> tar_wrapper, "tar = tarfile.open('{}', 'w:bz2')".format(self.conf.argdict['outputArchFile'].value[0])
elif self.conf.argdict['compressionType'] == 'none':
print >> tar_wrapper, "tar = tarfile.open('{}', 'w:')".format(self.conf.argdict['outputArchFile'].value[0])
print >> tar_wrapper, "for f in {}:".format(self.conf.argdict['inputDataFile'].value)
print >> tar_wrapper, " if not zipfile.is_zipfile(f) and not tarfile.is_tarfile(f):"
print >> tar_wrapper, " print 'Tarring {}'.format(os.path.basename(f))"
print >> tar_wrapper, " tar.add(f)"
print >> tar_wrapper, "if os.path.isdir('tmp'):"
print >> tar_wrapper, " for root, dirs, files in os.walk('tmp'):"
print >> tar_wrapper, " for name in files:"
print >> tar_wrapper, " print 'Tarring {}'.format(name)"
print >> tar_wrapper, " tar.add(os.path.join(root, name),name)"
print >> tar_wrapper, " shutil.rmtree('tmp')"
print >> tar_wrapper, "tar.close()"
os.chmod('tar_wrapper.py', 0755)
except (IOError, OSError) as e:
errMsg = 'error writing tar wrapper {fileName}: {error}'.format(fileName = 'tar_wrapper.py',
error = e
)
msg.error(errMsg)
raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_WRAPPER'),
errMsg
)
self._cmd.append('tar_wrapper.py')
elif self._exe == 'zip':
#archiving
if self._exe == 'zip':
if 'outputArchFile' not in self.conf.argdict:
raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_ARG_MISSING'), 'Missing output file name')
......@@ -1946,9 +1895,12 @@ class archiveExecutor(scriptExecutor):
#creating new archive
print >> zip_wrapper, "zf = zipfile.ZipFile('{}', mode='w', allowZip64=True)".format(self.conf.argdict['outputArchFile'].value[0])
print >> zip_wrapper, "for f in {}:".format(self.conf.argdict['inputDataFile'].value)
print >> zip_wrapper, " if not zipfile.is_zipfile(f) and not tarfile.is_tarfile(f):"
print >> zip_wrapper, " if not zipfile.is_zipfile(f):"
print >> zip_wrapper, " print 'Zipping {}'.format(os.path.basename(f))"
print >> zip_wrapper, " zf.write(f, arcname=os.path.basename(f), compress_type=zipfile.ZIP_STORED)"
print >> zip_wrapper, " if os.access(f, os.F_OK):"
print >> zip_wrapper, " print 'Removing input file {}'.format(f)"
print >> zip_wrapper, " os.unlink(f)"
print >> zip_wrapper, "if os.path.isdir('tmp'):"
print >> zip_wrapper, " for root, dirs, files in os.walk('tmp'):"
print >> zip_wrapper, " for name in files:"
......@@ -1967,19 +1919,18 @@ class archiveExecutor(scriptExecutor):
)
self._cmd.append('zip_wrapper.py')
#unarchiving
elif self._exe == 'unarchive':
if not zipfile.is_zipfile(self.conf.argdict['inputArchFile'].value[0]) and not tarfile.is_tarfile(self.conf.argdict['inputArchFile'].value[0]):
raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'),
'The input file is not a zip or tar archive - aborting unpacking')
for infile in self.conf.argdict['inputArchFile'].value:
if not zipfile.is_zipfile(infile):
raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_INPUT_FILE_ERROR'),
'An input file is not a zip archive - aborting unpacking')
self._cmd = ['python']
try:
with open('unarchive_wrapper.py', 'w') as unarchive_wrapper:
print >> unarchive_wrapper, "import zipfile, tarfile"
print >> unarchive_wrapper, "import zipfile"
print >> unarchive_wrapper, "for f in {}:".format(self.conf.argdict['inputArchFile'].value)
print >> unarchive_wrapper, " if zipfile.is_zipfile(f):"
print >> unarchive_wrapper, " archive = zipfile.ZipFile(f, mode='r')"
print >> unarchive_wrapper, " elif tarfile.is_tarfile(f):"
print >> unarchive_wrapper, " archive = tarfile.open(f, 'r:*')"
print >> unarchive_wrapper, " archive = zipfile.ZipFile(f, mode='r')"
print >> unarchive_wrapper, " path = '{}'".format(self.conf.argdict['path'])
print >> unarchive_wrapper, " print 'Extracting archive {0} to {1}'.format(f,path)"
print >> unarchive_wrapper, " archive.extractall(path)"
......
......@@ -49,9 +49,6 @@ def addMyArgs(parser):
# Use arggroup to get these arguments in their own sub-section (of --help)
parser.defineArgGroup('Archiver', 'Options')
parser.defineArgGroup('Unarchiver', 'Options')
parser.defineArgGroup('Tar archiver', 'Options')
parser.add_argument('--exe', group='Archiver',
help='Archiving command, default is zip', choices=['zip', 'tar'])
parser.add_argument('--inputDataFile', '--inputFile', nargs='+',
type=trfArgClasses.argFactory(trfArgClasses.argFile, io='input', type='misc'),
help='Input file(s)', group='Archiver')
......@@ -63,9 +60,6 @@ def addMyArgs(parser):
parser.add_argument('--outputArchFile', '--outputFile',
type=trfArgClasses.argFactory(trfArgClasses.argFile, io='output', type='misc'),
help='Output archive file', group='Archiver')
parser.add_argument('--compressionType', group='Tar archiver',
help='Underlying compression type of tar. The default is none', choices=['gzip', 'bzip2', 'none'],
default='none')
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment