From 883f385ff87d248fd1ede52fa2aec6a2e696d1e6 Mon Sep 17 00:00:00 2001
From: Mark Stockton <mark@cern.ch>
Date: Tue, 5 Nov 2019 18:13:49 +0100
Subject: [PATCH] Add log file checking to find failed child jobs and set
 mother return code to this result     Tested by killing by force a child
 process

---
 .../TrigTransform/python/trigRecoExe.py       | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/HLT/Trigger/TrigTransforms/TrigTransform/python/trigRecoExe.py b/HLT/Trigger/TrigTransforms/TrigTransform/python/trigRecoExe.py
index 62e9791b844..90060002dda 100644
--- a/HLT/Trigger/TrigTransforms/TrigTransform/python/trigRecoExe.py
+++ b/HLT/Trigger/TrigTransforms/TrigTransform/python/trigRecoExe.py
@@ -17,7 +17,7 @@ import subprocess
 from PyJobTransforms.trfExe import athenaExecutor
 
 #imports for preExecute
-from PyJobTransforms.trfUtils import asetupReport, cvmfsDBReleaseCheck
+from PyJobTransforms.trfUtils import asetupReport, cvmfsDBReleaseCheck, lineByLine
 import PyJobTransforms.trfEnv as trfEnv
 import PyJobTransforms.trfExceptions as trfExceptions
 from PyJobTransforms.trfExitCodes import trfExit as trfExit
@@ -195,8 +195,27 @@ class trigRecoExecutor(athenaExecutor):
             
     def postExecute(self):
                 
-        #TODO
-        #need to check for HLTMPPU.*Child Issue in the log file and throw an error message if there so we catch that the child died
+        #Adding check for HLTMPPU.*Child Issue in the log file
+        #   Throws an error message if there so we catch that the child died
+        #   Also sets the return code of the mother process to mark the job as failed
+        #   Is based on trfValidation.scanLogFile
+        log = self._logFileName
+        msg.debug('Now scanning logfile {0}'.format(log))
+        # Using the generator so that lines can be grabbed by subroutines if needed for more reporting
+        try:
+            myGen = lineByLine(log, substepName=self._substep)
+        except IOError as e:
+            msg.error('Failed to open transform logfile {0}: {1:s}'.format(log, e))
+        for line, lineCounter in myGen:
+            # Check to see if any of the hlt children had an issue
+            if 'Child Issue' in line > -1:
+                try:
+                    signal = int((re.search('signal ([0-9]*)', line)).group(1))
+                except AttributeError:
+                    #text signal not found so just return 0
+                    signal = 0
+                msg.error('Detected issue with HLTChild, setting mother return code to %s' % (signal) )
+                self._rc = signal
 
         msg.info("Check for expert-monitoring.root file")
         #the BS-BS step generates the files:
-- 
GitLab