diff --git a/GaudiPolicy/doc/release.notes b/GaudiPolicy/doc/release.notes index 192eaa66f3901c3902bd91fe66f1320e36af7bef..efade62b4614a5a927d66edbe091dd9dfe2f209f 100644 --- a/GaudiPolicy/doc/release.notes +++ b/GaudiPolicy/doc/release.notes @@ -2,6 +2,13 @@ package GaudiPolicy package manager: Marco Clemencic Commit Id: $Format:%H$ +! 2015-09-24 - Marco Clemencic + - GAUDI-1084: replaced calls to Popen terminate and kill with a function that + recurse and send a signal to all the children. + +! 2015-09-18 - Marco Clemencic + - GAUDI-1084: modified test timeout handling to avoid infinite waits + ============================= GaudiPolicy v15r5 ============================== ! 2015-05-14 - Marco Clemencic - Select 'dbg' in test reference files if the platform contains 'do0'. diff --git a/GaudiPolicy/python/GaudiTesting/BaseTest.py b/GaudiPolicy/python/GaudiTesting/BaseTest.py index bd2d9c9029679082caa16cec7815c4804a051f12..57a537dcd62e47dfd78f5af0f3f729463c808242 100644 --- a/GaudiPolicy/python/GaudiTesting/BaseTest.py +++ b/GaudiPolicy/python/GaudiTesting/BaseTest.py @@ -27,6 +27,33 @@ def sanitize_for_xml(data): return ''.join('[NON-XML-CHAR-0x%2X]' % ord(c) for c in match.group()) return bad_chars.sub(quote, data) +def dumpProcs(name): + '''helper to debug GAUDI-1084, dump the list of processes''' + from getpass import getuser + if 'WORKSPACE' in os.environ: + p = Popen(['ps', '-fH', '-U', getuser()], stdout=PIPE) + with open(os.path.join(os.environ['WORKSPACE'], name), 'w') as f: + f.write(p.communicate()[0]) + +def kill_tree(ppid, sig): + ''' + Send a signal to a process and all its child processes (starting from the + leaves). + ''' + log = logging.getLogger('kill_tree') + ps_cmd = ['ps', '--no-headers', '-o', 'pid', '--ppid', str(ppid)] + get_children = Popen(ps_cmd, stdout=PIPE, stderr=PIPE) + children = map(int, get_children.communicate()[0].split()) + for child in children: + kill_tree(child, sig) + try: + log.debug('killing process %d', ppid) + os.kill(ppid, sig) + except OSError, err: + if err.errno != 3: # No such process + raise + log.debug('no such process %d', ppid) + #-------------------------------------------------------------------------# class BaseTest(object): @@ -134,25 +161,26 @@ class BaseTest(object): params, workdir) self.proc = Popen(params, stdout=PIPE, stderr=PIPE, env=self.environment) + logging.debug('(pid: %d)', self.proc.pid) self.out, self.err = self.proc.communicate() thread = threading.Thread(target=target) thread.start() - #catching timeout + # catching timeout thread.join(self.timeout) if thread.is_alive(): + logging.debug('time out in test %s (pid %d)', self.name, self.proc.pid) # get the stack trace of the stuck process - cmd = ['gdb', '-p', str(self.proc.pid), '-n', '-q'] + cmd = ['gdb', '--pid', str(self.proc.pid), '--batch', + '--eval-command=thread apply all backtrace'] gdb = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=STDOUT) - self.stack_trace = gdb.communicate('set pagination off\n' - 'set confirm off\n' - 'thread apply all backtrace\n' - 'quit\n')[0] - - self.proc.send_signal(signal.SIGKILL) - logging.debug('time out in test %s', self.name) - thread.join() + self.stack_trace = gdb.communicate()[0] + + kill_tree(self.proc.pid, signal.SIGTERM) + thread.join(60) + if thread.is_alive(): + kill_tree(self.proc.pid, signal.SIGKILL) self.causes.append('timeout') else: logging.debug('completed test %s', self.name)