Skip to content
Snippets Groups Projects

New, better fix for BMK-247 (undo and replace the previous fix)

Merged Andrea Valassi requested to merge valassi/hep-workloads:BMK-247c into qa
1 file
+ 22
23
Compare changes
  • Side-by-side
  • Inline
+ 22
23
@@ -258,6 +258,7 @@ else
exit 1 # early termination (cannot start processing)
fi
fi
baseWDir=$(cd $baseWDir; pwd)
# Dump all relevant variables after validating the input arguments
# Keep a copy on a separate log too for parser tests on previous logs
@@ -285,39 +286,41 @@ echo -e "[$bmkDriver] APP=${APP}\n"
function doOneWrapper(){
if [ "$1" == "" ] || [ "$2" != "" ]; then
echo -e "[$bmkDriver] ERROR! Invalid arguments '$@' to doOneWrapper" # internal error (inconsistent code)
exit 1;
return 1 # NB: return or exit are equivalent here because doOneWrapper is executed as a subprocess
fi
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 created"
while [ -f redLightBMK247 ]; do sleep 0.01; done # wait until all subprocesses have been created (fix BMK-247)
for i in $(seq $1); do sleep 0.1; done # stagger subprocess start by 100ms
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 started"
###sleep 5 # this is not needed if the list of jobs is compiled from all '$!'
workDir=$(pwd)/proc_$1 # current directory is $baseWDir here
log=${workDir}/doOneWrapper_$1.log
echo -e "[doOneWrapper ($1)] working directory : ${workDir}"
echo -e "[doOneWrapper ($1)] log : $log"
echo -e "[doOneWrapper ($1)] workdir is ${workDir}"
if ! mkdir -p $workDir || ! cd $workDir; then
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create workdir)\n"
return 1
fi
log=${workDir}/doOneWrapper_$1.log
echo -e "[doOneWrapper ($1)] logfile is $log"
if ! touch $log ; then
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed (cannot create logfile)\n"
return 1
fi
echo -e "[doOneWrapper ($1)] $(date) : process $1 configured" 2>&1 | tee -a $log # configured means that log exists
mkdir $workDir/HOME
export HOME=$workDir/HOME # avoid writing to /root in read-only docker or to host HOME in singularity (BMK-166)
echo -e "[doOneWrapper ($1)] HOME=$HOME" 2>&1 | tee -a $log
echo -e "[doOneWrapper ($1)] $(date) : process $1 configured" 2>&1 | tee -a $log
cd -P /proc/self && basename $PWD | ( read thispid; \
echo -e "[doOneWrapper ($1)] current process pid is $thispid" 2>&1 | tee -a $log ) # see https://stackoverflow.com/a/15170225
cd - > /dev/null
local pid=$(cat $log | grep "current process pid is" | sed -e "s/.*current process pid is //")
local parsertest=0 # hardcoded: 0 => doOne (default); 1 => test the parser on old logs and bypass doOne (BMK-152)
if [ $parsertest -eq 0 ]; then
if [ "$(whoami)" == "root" ] && cat /proc/self/cgroup | cut -d/ -f2 | grep docker > /dev/null; then
echo -e "[doOneWrapper ($1)] $(date) : inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log
echo -e "[doOneWrapper ($1)] inside docker - run doOne as bmkuser\n" 2>&1 | tee -a $log
export -f doOne
chown -R bmkuser:bmkuser $workDir 2>&1 | tee -a $log
su bmkuser -s /bin/bash -c "doOne $1" 2>&1 | tee -a $log
local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee!
chown -R root:root $workDir 2>&1 | tee -a $log
else
echo -e "[doOneWrapper ($1)] $(date) : not inside docker - run doOne as $(whoami)\n" 2>&1 | tee -a $log
echo -e "[doOneWrapper ($1)] not inside docker - run doOne as $(whoami)\n" 2>&1 | tee -a $log
doOne $1 2>&1 | tee -a $log
local status=${PIPESTATUS[0]} # NB do not use $? if you pipe to tee!
fi
@@ -328,10 +331,10 @@ function doOneWrapper(){
echo -e "[doOneWrapper ($1)] DUMMY doOne: copy old logs for parser tests (BMK-152)"
fi
if [ "$status" == "0" ]; then
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 completed ok\n" 2>&1 | tee -a $log
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) completed ok\n" 2>&1 | tee -a $log
return 0
else
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 failed\n" 2>&1 | tee -a $log
echo -e "\n[doOneWrapper ($1)] $(date) : process $1 (pid=$pid) failed\n" 2>&1 | tee -a $log
return 1
fi
}
@@ -344,9 +347,6 @@ done
# Spawn doOne subprocesses (unless validateInputArguments failed)
if [ $fail -eq 0 ]; then
# Create a semaphore for subprocesses (fix BMK-247)
touch $baseWDir/redLightBMK247
# Spawn subprocesses (and keep track of their list of them using '$!')
echo -e "------------------------------------------------------------------------"
echo -e "[$bmkDriver] spawn $NCOPIES processes"
@@ -357,19 +357,18 @@ if [ $fail -eq 0 ]; then
ipid=$!
[ $DEBUG -gt 0 ] && echo -e "[$bmkDriver] spawned process $i with pid $ipid"
jobs="$jobs $ipid"
sleep 0.1 # stagger subprocess creation by 100ms
sleep 0.1 # stagger job creation by 100ms
done
[ $DEBUG -gt 0 ] && echo -e "\n[$bmkDriver] pids of spawned processes:$jobs\n"
# Give the green light for all subprocesses to start (fix BMK-247)
rm -f $baseWDir/redLightBMK247
# Wait for all subprocesses to complete and check their exit codes
# [NB: do not use 'jobs -p': some jobs may be missing if already completed]
[ $DEBUG -gt 0 ] && echo -e "\n[$bmkDriver] $(date) ... waiting for spawned processes with pid's$jobs\n"
wait $jobs > /dev/null 2>&1
fail=0 # unnecessary but harmless (this code is only executed if $fail -eq 0)
for job in ${jobs}; do # better than using $(jobs -p)
[ $DEBUG -gt 0 ] && echo -e "\n[$bmkDriver] $(date) ... waiting for spawned process pid=$job\n"
wait $job || let "fail+=1"
for i in $(seq 1 $NCOPIES); do
if [ $(cat $baseWDir/proc_$i/doOneWrapper_$i.log | grep "[doOneWrapper ($i)]" | grep "completed ok" | wc -l) -ne 1 ]; then
let "fail+=1"
fi
done
echo -e "\n------------------------------------------------------------------------"
if [ $fail -gt 0 ]; then
Loading