From b68fb66cf7a1aebd2d2723b46e4e2601c82764c8 Mon Sep 17 00:00:00 2001
From: Marcelo Vogel <mavogel@cern.ch>
Date: Wed, 28 Jun 2017 16:36:22 +0200
Subject: [PATCH] Zip or tar multiple small outputs with Archive_tf.py
 (ATLASJT-296)

The functionality of Archive_tf.py has been expanded, not only
to archive files into a tar format (with or without compression),
but also to zip files. This functionality was requested by ADC to
manage large numbers of small outputs, which need to be stored for
metadata purposes. The extended functionality allows to zip together
the small output files before storage.


Former-commit-id: 24d16b6ef51f5a04ac23c3adebaeac5ff5f08dbe
---
 Tools/PyJobTransforms/python/trfExe.py      | 35 ++++++++++++++-------
 Tools/PyJobTransforms/scripts/Archive_tf.py | 17 +++++-----
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/Tools/PyJobTransforms/python/trfExe.py b/Tools/PyJobTransforms/python/trfExe.py
index f4767db61c3..b0650234262 100755
--- a/Tools/PyJobTransforms/python/trfExe.py
+++ b/Tools/PyJobTransforms/python/trfExe.py
@@ -1871,19 +1871,30 @@ class tagMergeExecutor(scriptExecutor):
 ## @brief Archive transform - use tar
 class archiveExecutor(scriptExecutor):
 
+    def __init__(self, name = 'Archiver', exe = 'zip'):
+        super(archiveExecutor, self).__init__(name=name, exe=exe, memMonitor=False)
+
     def preExecute(self, input = set(), output = set()):
         self.setPreExeStart()
-        # Set the correct command for execution
-        self._cmd = [self._exe, '-c', '-v',]
-        if 'compressionType' in self.conf.argdict.keys():
-            if self.conf.argdict['compressionType'] == 'gzip':
-                self._cmd.append('-z')
-            elif self.conf.argdict['compressionType'] == 'bzip2':
-                self._cmd.append('-j')
-            elif self.conf.argdict['compressionType'] == 'none':
-                pass
-        self._cmd.extend(['-f', self.conf.argdict['outputArchFile'].value[0]])
+
+        if 'exe' in self.conf.argdict:
+            self._exe = self.conf.argdict['exe']
+
+        if self._exe == 'tar':
+            self._cmd = [self._exe, '-c', '-v',]
+            self._cmd.extend(['-f', self.conf.argdict['outputArchFile'].value[0]])
+            if 'compressionType' in self.conf.argdict:
+                if self.conf.argdict['compressionType'] == 'gzip':
+                    self._cmd.append('-z')
+                elif self.conf.argdict['compressionType'] == 'bzip2':
+                    self._cmd.append('-j')
+                elif self.conf.argdict['compressionType'] == 'none':
+                    pass
+        elif self._exe == 'zip':
+            self._cmd = [self._exe]
+            self._cmd.extend([self.conf.argdict['outputArchFile'].value[0]])
+            if '.' not in self.conf.argdict['outputArchFile'].value[0]:
+                errmsg = 'Output filename must end in ".", ".zip" or ".anyname" '
+                raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), errmsg)
         self._cmd.extend(self.conf.argdict['inputDataFile'].value)
-        
         super(archiveExecutor, self).preExecute(input=input, output=output)
-
diff --git a/Tools/PyJobTransforms/scripts/Archive_tf.py b/Tools/PyJobTransforms/scripts/Archive_tf.py
index 0bbdb88a2cc..ee55654a438 100755
--- a/Tools/PyJobTransforms/scripts/Archive_tf.py
+++ b/Tools/PyJobTransforms/scripts/Archive_tf.py
@@ -36,7 +36,7 @@ def main():
     sys.exit(trf.exitCode)
 
 def getTransform():
-    trf = transform(executor = archiveExecutor(name = 'Archiver', exe='tar'))
+    trf = transform(executor = archiveExecutor(name = 'Archiver', exe = 'zip'))
 
     addMyArgs(trf.parser)
     return trf
@@ -44,17 +44,20 @@ def getTransform():
 
 def addMyArgs(parser):
     # Use arggroup to get these arguments in their own sub-section (of --help)
-    parser.defineArgGroup('Archive_tf', 'Archive transform specific options')
+    parser.defineArgGroup('Archive_tf', 'Archive transform options')
+    parser.defineArgGroup('Tar archiver', 'Options')
+    # Do not add a default value to --exe, this is handled through the archiveExecutor
+    parser.add_argument('--exe', group='Archive_tf',
+                        help='Archiving command. Default is zip', choices=['zip', 'tar'])
     parser.add_argument('--inputDataFile', '--inputFile', nargs='+', 
                         type=trfArgClasses.argFactory(trfArgClasses.argFile, io='input', type='misc'),
                         help='Input file(s)', group='Archive_tf')
     parser.add_argument('--outputArchFile', '--outputFile', 
                         type=trfArgClasses.argFactory(trfArgClasses.argFile, io='output', type='misc'),
-                        help='Output archive file', 
-                        group='Archive_tf')
-    parser.add_argument('--compressionType', group='Archive_tf',
-                        help='Underlying compression type', choices=['gzip', 'bzip2', 'none'],
-                        default='gzip')
+                        help='Output archive file', group='Archive_tf')
+    parser.add_argument('--compressionType', group='Tar archiver',
+                        help='Underlying compression type. Default is none', choices=['gzip', 'bzip2', 'none'],
+                        default='none')
 
 if __name__ == '__main__':
     main()
-- 
GitLab