From 03159a430cb7eaec72d346e8f31a1e6d4861af49 Mon Sep 17 00:00:00 2001
From: Marcelo Vogel <mavogel@cern.ch>
Date: Thu, 7 Jun 2018 15:36:54 +0200
Subject: [PATCH] Transform support for reporting additional metadata
 (ATLASJT-375)

This branch adds a few python modules and classes to support the
reporting of additional metadata produced in event generation. It
is also needed to fix a bug reported in AGENE-1586


Former-commit-id: a5aed9f264b0efd1c1887253754608b2f0258d7b
---
 Tools/PyJobTransforms/python/trfArgClasses.py | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/Tools/PyJobTransforms/python/trfArgClasses.py b/Tools/PyJobTransforms/python/trfArgClasses.py
index b3444de0aa1..b0bd7ef7f08 100644
--- a/Tools/PyJobTransforms/python/trfArgClasses.py
+++ b/Tools/PyJobTransforms/python/trfArgClasses.py
@@ -1730,6 +1730,70 @@ class argHepEvtAsciiFile(argFile):
                 msg.error('Event count for file {0} failed: {1!s}'.format(fname, e))
                 self._fileMetadata[fname]['nentries'] = None
                 
+## @brief LHE ASCII file 
+class argLHEFile(argFile):
+    def __init__(self, value=list(), io = 'output', type=None, splitter=',', runarg=True, multipleOK=None, name=None):
+        super(argLHEFile, self).__init__(value=value, io=io, type=type, splitter=splitter, runarg=runarg, multipleOK=multipleOK,
+                                           name=name)
+
+        self._metadataKeys.update({
+                'nentries': self._getNumberOfEvents,
+                'lheSumOfPosWeights': self._getWeightedEvents,
+                'lheSumOfNegWeights': 0,
+                })
+
+    def _getNumberOfEvents(self, files):
+        msg.debug('Retrieving event count for LHE file {0}'.format(files))
+        import tarfile
+        for fname in files:
+            # Attempt to treat this as a pileup reweighting file
+            try :
+                tar = tarfile.open(fname, "r:gz")
+                lhecount = 0
+                for untar in tar.getmembers():
+                    fileTXT = tar.extractfile(untar)
+                    if fileTXT is not None :
+                        lines = fileTXT.read()
+                        lhecount = lines.find('/event')
+
+                self._fileMetadata[fname]['nentries'] = lhecount
+            except :
+                msg.debug('Entries is set to None - event count undefined for this LHE')
+                self._fileMetadata[fname]['nentries'] = 'UNDEFINED'
+
+    def _getWeightedEvents(self, files):
+        msg.debug('Retrieving weight count for LHE file {0}'.format(files))
+        import tarfile
+        import re
+
+        for fname in files:
+            weightPos = 0
+            weightNeg = 0
+            try :
+                tar = tarfile.open(fname, "r:gz")
+                for untar in tar.getmembers():
+                    fileTXT = tar.extractfile(untar)
+                    next = False
+                    if fileTXT is not None :
+                        lines = fileTXT.readlines()
+                        for line in lines :
+                            if next :
+                                try :
+                                    w = float(re.sub(' +',' ',line).split(" ")[2])
+                                    if w > 0 : weightPos += w
+                                    else : weightNeg += abs(w)
+                                except :
+                                    pass
+                                next = False
+                            if "<event" in line :
+                                next = True
+
+                self._fileMetadata[fname]['lheSumOfPosWeights'] = weightPos
+                self._fileMetadata[fname]['lheSumOfNegWeights'] = weightNeg
+            except :
+                msg.debug('Entries is set to None - negative fraction count undefined for this LHE')
+                self._fileMetadata[fname]['lheSumOfPosWeights'] = 'UNDEFINED'
+                self._fileMetadata[fname]['lheSumOfNegWeights'] = 'UNDEFINED'
 
 ## @brief Base class for substep arguments
 #  @details Sets up a dictionary with {substep1: value1, substep2: value2, ...}
-- 
GitLab