Commit 41e26c08 authored by Brian Paul Bockelman's avatar Brian Paul Bockelman
Browse files

Convert the xrootd-access and xrootd-fallback tests to python.

Includes a hard timeout for the test, after which we will SIGKILL cmsRun.
parent 288ab807
#! /bin/bash
function add2buffer {
echo "`date`: $1"
}
function do_print {
echo "summary: $1"
}
# Change to test directory
cd `dirname $0`
# parse arguments
verbosity="1"
while getopts "v:H:t:" flag
do
case "$flag" in
v) verbosity=$OPTARG;;
H) host=$OPTARG;;
t) timeout=$OPTARG;;
esac
done
# For now, never give failures - just warnings
NAG_CRITICAL=$SAME_WARNING
NAG_WARNING=$SAME_WARNING
NAG_OK=$SAME_OK
# Default to successful job
exitcode=$NAG_OK
# Print environment information
now="`date -u +'%F %T'` UTC"
currdir=$PWD
host=`uname -n`
pilotid=`/usr/bin/id`
pilotidu=`/usr/bin/id -u`
pilotidg=`id -G | tr ' ' '\n' | sort -n | tr '\n' ' '`
add2buffer "Ran at $now on host $host, directory $currdir, as user:"
add2buffer "$pilotid"
# Check that $X509_USER_PROXY points to an existing file
if [ -z "$X509_USER_PROXY" ]; then
do_print "Error: X509_USER_PROXY is not defined"
exit $NAG_CRITICAL
fi
if [ -f "$X509_USER_PROXY" ]; then
add2buffer "X509_USER_PROXY=$X509_USER_PROXY"
else
do_print "Error: X509_USER_PROXY points to a non existing location"
exit $NAG_CRITICAL
fi
# workaround to suppress voms errors on OSG
export VOMS_PROXY_INFO_DONT_VERIFY_AC="1"
# Setup the grid environment:
if [ -n "$OSG_GRID" ] ; then
[ -f $OSG_GRID/setup.sh ] && source $OSG_GRID/setup.sh
fi
dn=`voms-proxy-info --identity`
fqan=`voms-proxy-info --fqan | head -1`
add2buffer "DN: $dn"
add2buffer "Primary FQAN: $fqan"
# Set the CMS environment
if [ -n "$VO_CMS_SW_DIR" ]; then
SW_DIR=$VO_CMS_SW_DIR
add2buffer "VO_CMS_SW_DIR=$VO_CMS_SW_DIR"
elif [ -n "$OSG_APP" ] ; then
SW_DIR=$OSG_APP/cmssoft/cms
add2buffer "OSG_APP/cmssoft/cms=$OSG_APP/cmssoft/cms"
elif [ -n "$CVMFS" ] ; then
SW_DIR=$CVMFS/cms.cern.ch
add2buffer "CVMFS (via env)=$CVMFS/cms.cern.ch"
elif [ -e "/cvmfs/cms.cern.ch" ] ; then
SW_DIR=/cvmfs/cms.cern.ch
add2buffer "CVMFS=/cvmfs/cms.cern.ch"
else
do_print "Error: None of OSG_APP, VO_CMS_SW_DIR, or CVMFS are present."
exit $NAG_CRITICAL
fi
if [ ! -f $SW_DIR/cmsset_default.sh ]; then
do_print "Error: cmssw setup file $SW_DIR/cmsset_default.sh not existing"
exit $NAG_CRITICAL
fi
add2buffer "CMS configuration file: $SW_DIR/cmsset_default.sh"
export SCRAM_ARCH=slc5_amd64_gcc462
source $SW_DIR/cmsset_default.sh
err=$?
if [ $err != 0 ]; then
do_print "Error: CMS software initialisation script cmsset_default.sh failed"
exit $NAG_CRITICAL
fi
if [ -z $CMS_PATH ]; then
do_print "Error: CMS_PATH not defined"
exit $NAG_CRITICAL
fi
if [ ! -d $CMS_PATH ] ; then
do_print "Error: CMS_PATH directory $CMS_PATH not existing"
exit $NAG_CRITICAL
fi
# Parse the local config file and find site name
if [ ! -d $CMS_PATH/SITECONF/local/JobConfig ] ; then
do_print "Error: JobConfig directory $CMS_PATH/SITECONF/local/JobConfig not existing"
exit $NAG_CRITICAL
fi
ConfigFile=${CMS_PATH}/SITECONF/local/JobConfig/site-local-config.xml
if [ ! -f $ConfigFile ] ; then
do_print "Error: Local Configuration file site-local-config.xml not existing"
exit $NAG_CRITICAL
fi
add2buffer "Local configuration file: $ConfigFile"
grep -q "site name" $ConfigFile
err=$?
if [ $err != 0 ] ; then
do_print "Error: site name string missing in config file"
exit $NAG_CRITICAL
fi
siteName=`grep "site name" $ConfigFile | grep -v "subsite name" | cut -d'"' -f2`
add2buffer "Site name: $siteName"
cmssw_file="/store/mc/SAM/GenericTTbar/GEN-SIM-RECO/CMSSW_5_3_1_START53_V5-v1/0013/CE4D66EB-5AAE-E111-96D6-003048D37524.root"
xrootd_file="root://cms-xrd-global.cern.ch//store/test/xrootd/$siteName/$cmssw_file"
add2buffer "Xrootd file we will test: $xrootd_file"
cmssw_version="CMSSW_5_3_1"
mkdir xrootd-access
if [ $? != 0 ] ; then
do_print "Error: cannot make xrootd-access directory"
exit $NAG_CRITICAL
fi
cd xrootd-access
scramv1 p CMSSW $cmssw_version
err=$?
if [ $err != 0 ] ; then
do_print "Error: cannot make $cmssw_version release area (SCRAM_ARCH=$SCRAM_ARCH)"
exit $NAG_CRITICAL
fi
cd $cmssw_version
if [ $? != 0 ] ; then
do_print "Error: cannot cd into $cmssw_version directory"
exit $NAG_CRITICAL
fi
cat > test_xrootd.py << EOF
#!/usr/bin/env python
import os
import sys
import time
import errno
import fcntl
import select
import signal
import socket
import logging
import optparse
log = logging.getLogger()
cms_file = """
import FWCore.ParameterSet.Config as cms
process = cms.Process('XrootdTest')
process.source = cms.Source('PoolSource',
fileNames = cms.untracked.vstring("$xrootd_file"),
fileNames = cms.untracked.vstring("%s"),
)
process.SiteLocalConfigService = cms.Service("SiteLocalConfigService",
......@@ -176,27 +33,301 @@ process.maxEvents = cms.untracked.PSet(
)
process.p = cms.Path(process.dump)
EOF
eval `scramv1 runtime -sh`
err=$?
if [ $err != 0 ] ; then
do_print "Error $err: Cannot setup $cmssw_version environment (SCRAM_ARCH=$SCRAM_ARCH)"
exit $NAG_CRITICAL
fi
cmsRun test_xrootd.py
err=$?
if [ $err -ne 0 ]; then
do_print "cmsRun failed."
exit $NAG_CRITICAL
fi
# exit
if [ $exitcode -ne $NAG_OK ]; then
do_print "Warning: execution contains warnings"
else
do_print "Success"
fi
exit $exitcode
"""
def configure_logging(lvl=logging.INFO):
logger = logging.getLogger("cms.CE.xrootd-access")
logger.setLevel(lvl)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(fmt="[%(process)d] %(asctime)s [%(levelname)07s]: %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
global log
log = logger
def print_summary(summary, retval):
if retval == 0:
log.info(summary)
else:
log.error(summary)
print "summary: %s" % summary
return retval
def parse_opts():
parser = optparse.OptionParser()
parser.add_option("-v", "--verbose", dest="verbose", help="Increase logging verbosity", action="store_true", default=False)
parser.add_option("-H", "--host", dest="hostname", help="Hostname to use")
parser.add_option("-t", "--timeout", dest="timeout", help="Test timeout in seconds; default is 240", default=240, type="int")
opts, args = parser.parse_args()
opts.starttime = time.time()
opts.endtime = opts.starttime + opts.timeout
if opts.verbose:
configure_logging(logging.DEBUG)
else:
configure_logging()
return args, opts
def getExitCode(name, default):
try:
return int(os.environ.get(name, default))
except:
return 0
def runCommandChild(cmd, args):
try:
try:
os.execvpe(cmd, [cmd]+args, os.environ)
except OSError, e:
print "Error exec'ing %s: %s\n" % (cmd, str(e))
finally:
os._exit(127)
def runCommandParent(r, pid, opts):
flags = fcntl.fcntl(r, fcntl.F_GETFL)
flags |= os.O_NONBLOCK
fcntl.fcntl(r, fcntl.F_SETFL, flags)
xlist = []
rlist = [r]
wlist = []
timeout = opts.endtime - time.time()
stdout = ""
exitCode = -1
while (timeout >= 0) and (r not in xlist):
rlist, wlist, xlist = select.select(rlist, wlist, xlist, timeout)
timeout = opts.endtime - time.time()
if r in rlist:
newstr = os.read(r, 1024)
stdout += newstr
while newstr:
try:
newstr = os.read(r, 1024)
except OSError, oe:
if oe.errno == errno.EAGAIN:
newstr = ''
else:
raise
stdout += newstr
rlist = [r]
mypid, exitCode = os.waitpid(pid, os.WNOHANG)
if mypid:
break
exitCode = -1
if (timeout < 0) and (exitCode < 0):
os.kill(pid, signal.SIGKILL)
if exitCode < 0:
pid, exitCode = os.waitpid(pid, 0)
return stdout, exitCode
def runCommand(cmd, args, opts):
r, w = os.pipe()
try:
pid = os.fork()
if pid: # parent
os.close(w)
return runCommandParent(r, pid, opts)
else:
os.close(r)
os.dup2(w, 1)
os.close(w)
runCommandChild(cmd, args)
finally:
try:
os.close(r)
except OSError:
pass
try:
os.close(w)
except OSError:
pass
def sourceFile(filename, opts):
if not os.path.exists(filename):
log.warn("Trying to source a file (%s) which does not exist" % filename)
return
stdout, exitCode = runCommand("/bin/sh", ["-c", "source %s && env" % filename], opts)
if exitCode:
return exitCode
for line in stdout.splitlines():
info = line.split("=", 1)
if len(info) == 2:
os.environ[info[0]] = info[1]
return exitCode
def evalCommand(cmd, args, opts):
stdout, exitCode = runCommand('/bin/sh', ['-c', 'eval `%s %s` && env' % (cmd, ' '.join(args))], opts)
if exitCode:
return exitCode
for line in stdout.splitlines():
info = line.split("=", 1)
if len(info) == 2:
os.environ[info[0]] = info[1]
return exitCode
def main():
# Change to test directory
dirname = os.path.split(sys.argv[0])[0]
if dirname:
os.chdir(dirname)
_, opts = parse_opts()
# For now, never give failures - just warnings
NAG_CRITICAL = getExitCode("SAME_WARNING", 1)
#NAG_WARNING = getExitCode("SAME_WARNING", 1)
NAG_OK = getExitCode("SAME_OK", 0)
# Default to successful job
exitcode = NAG_OK
now = time.strftime("%x %X", time.gmtime())
currdir = os.path.abspath(os.curdir)
host = socket.gethostname()
pilot_user_info, exitCode = runCommand("/usr/bin/id", ["-u"], opts)
if exitCode:
return print_summary("Failed to run 'id -u' to get user information", NAG_CRITICAL)
pilot_user_info = pilot_user_info.strip()
log.info("Ran at %s on host %s, directory %s, as user %s" % (now, host, currdir, pilot_user_info))
pilot_uid = os.geteuid()
pilot_proxy = os.environ.get("X509_USER_PROXY", "/tmp/x509up_u%d" % pilot_uid)
if not os.path.isfile(pilot_proxy):
return print_summary("X509_USER_PROXY=%s is not a file" % pilot_proxy, NAG_CRITICAL)
log.info("X509_USER_PROXY=%s" % pilot_proxy)
# Workaround to suppress voms errors on OSG
os.environ["VOMS_PROXY_INFO_DONT_VERIFY_AC"] = "1"
if 'OSG_GRID' in os.environ:
osg_setup = os.path.join(os.environ['OSG_GRID'], 'setup.sh')
if not os.path.isfile(osg_setup):
log.warning("$OSG_GRID is defined (%s), but %s does not exist." % (os.environ['OSG_GRID'], osg_setup))
else:
exitCode = sourceFile(osg_setup, opts)
if exitCode:
return print_summary("Failed to source %s." % osg_setup, NAG_CRITICAL)
dn, exitCode = runCommand("voms-proxy-info", ["--identity"], opts)
dn = dn.strip()
if exitCode:
log.warning("Unable to determine DN from voms-proxy-info")
else:
log.info("DN: %s" % dn)
fqan, exitCode = runCommand("voms-proxy-info", ["--fqan"], opts)
if fqan:
fqan = fqan.splitlines()[0]
if exitCode:
log.warning("Unable to determine primary FQAN from voms-proxy-info")
else:
log.info("Primary FQAN: %s" % fqan)
# Set the CMS environment
if 'VO_CMS_SW_DIR' in os.environ:
sw_dir = os.environ['VO_CMS_SW_DIR']
elif 'OSG_APP' in os.environ:
sw_dir = os.path.join(os.environ['OSG_APP'], 'cmssoft', 'cms')
elif 'CVMFS' in os.environ:
sw_dir = os.path.join(os.environ['CVMFS'], 'cms.cern.ch')
elif os.path.isdir('/cvmfs/cms.cern.ch'):
sw_dir = '/cvmfs/cms.cern.ch'
else:
return print_summary("None of $VO_CMS_SW_DIR, $OSG_APP, $CVMFS, or /cvmfs/cms.cern.ch available", NAG_CRITICAL)
log.info("Using software directory %s" % sw_dir)
sw_setup_script = os.path.join(sw_dir, 'cmsset_default.sh')
if not os.path.isfile(sw_setup_script):
return print_summary("The software setup script (%s) is missing" % sw_setup_script, NAG_CRITICAL)
log.info("CMS software setup script (%s) is present." % sw_setup_script)
scram_arch = "slc5_amd64_gcc462"
log.info("Using SCRAM_ARCH=%s" % scram_arch)
os.environ["SCRAM_ARCH"] = scram_arch
exitCode = sourceFile(sw_setup_script, opts)
if exitCode:
return print_summary("Failed to source setup script %s." % sw_setup_script, NAG_CRITICAL)
if 'CMS_PATH' not in os.environ:
return print_summary("CMS_PATH not defined after sourcing %s" % sw_setup_script, NAG_CRITICAL)
if not os.path.isdir(os.environ['CMS_PATH']):
return print_summary("CMS_PATH %s is not a directory." % os.environ['CMS_PATH'], NAG_CRITICAL)
# Parse the local config file and find site name
job_config_dir = os.path.join(os.environ['CMS_PATH'], 'SITECONF', 'local', 'JobConfig')
if not os.path.isdir(job_config_dir):
return print_summary("JobConfig directory %s does not exist" % job_config_dir, NAG_CRITICAL)
config_file = os.path.join(job_config_dir, 'site-local-config.xml')
if not os.path.isfile(config_file):
return print_summary("Local configuration file %s does not exist." % config_file, NAG_CRITICAL)
log.info("Local configuration file: %s" % config_file)
# TODO: Parse the site-local-config instead!
stdout, exitCode = runCommand('grep', ['site name', config_file], opts)
if exitCode:
return print_summary("Cannot read configuration file %s" % config_file, NAG_CRITICAL)
siteName = None
for line in stdout.splitlines():
if 'subsite name' in line:
continue
info = line.split('"')
if len(info) < 2:
continue
siteName = info[1].strip()
break
if siteName == None:
return print_summary("Missing site name in configuration file %s" % config_file, NAG_CRITICAL)
log.info("Site name: %s" % siteName)
# Setup CMSSW, run command
cmssw_file="/store/mc/SAM/GenericTTbar/GEN-SIM-RECO/CMSSW_5_3_1_START53_V5-v1/0013/CE4D66EB-5AAE-E111-96D6-003048D37524.root"
xrootd_file="root://cms-xrd-global.cern.ch//store/test/xrootd/%s/%s" % (siteName, cmssw_file)
log.info("Xrootd file we will test: %s" % xrootd_file)
try:
os.mkdir("xrootd-access")
except OSError, oe:
return print_summary("Failure to create test directory 'xrootd-access': %s." % str(oe), NAG_CRITICAL)
os.chdir("xrootd-access")
cmssw_version="CMSSW_5_3_1"
stdout, exitCode = runCommand("scramv1", ["p", "CMSSW", cmssw_version], opts)
log.info("scramv1 p CMSSW %s output:" % cmssw_version)
print stdout
if exitCode:
return print_summary("Cannot make %s release area (SCRAM_ARCH=%s)" % (cmssw_version, scram_arch), NAG_CRITICAL)
os.chdir(cmssw_version)
fd = open("test_xrootd.py", "w")
fd.write(cms_file % xrootd_file)
fd.close()
exitCode = evalCommand("scramv1", ["runtime", "-sh"], opts)
if exitCode:
return print_summary("Failure when setting up scramv1 runtime (CMSSW %s, SCRAM_ARCH=%s)" % (cmssw_version, scram_arch), NAG_CRITICAL)
stdout, exitCode = runCommand("cmsRun", ["test_xrootd.py"], opts)
if exitCode:
log.error("Failed cmsRun. Output:")
print stdout
return print_summary("Failed cmsRun; exit code %d" % exitCode, NAG_CRITICAL)
log.info("Successful cmsRun. Output:")
print stdout
# Return the correct exit code.
if exitcode != NAG_OK:
log.warning("Execution contains warnings")
else:
log.info("Success!")
return exitcode
if __name__ == '__main__':
sys.exit(main())
#! /bin/bash
function add2buffer {
echo "`date`: $1"
}
function do_print {
echo "summary: $1"
}
# Change to test directory
cd `dirname $0`
# parse arguments
verbosity="1"
while getopts "v:H:t:" flag
do
case "$flag" in
v) verbosity=$OPTARG;;
H) host=$OPTARG;;
t) timeout=$OPTARG;;
esac
done
# For now, give warnings by default.
# For T2_US_* sites, this is later upgraded to a critical test.
NAG_CRITICAL=$SAME_WARNING
NAG_WARNING=$SAME_WARNING
NAG_OK=$SAME_OK
# Default to successful job
exitcode=$NAG_OK
# Print environment information
now="`date -u +'%F %T'` UTC"
currdir=$PWD
host=`uname -n`
pilotid=`/usr/bin/id`
pilotidu=`/usr/bin/id -u`
pilotidg=`id -G | tr ' ' '\n' | sort -n | tr '\n' ' '`
add2buffer "Ran at $now on host $host, directory $currdir, as user:"
add2buffer "$pilotid"
# Check that $X509_USER_PROXY points to an existing file
if [ -z "$X509_USER_PROXY" ]; then
do_print "Error: X509_USER_PROXY is not defined"
exit $NAG_CRITICAL
fi
if [ -f "$X509_USER_PROXY" ]; then
add2buffer "X509_USER_PROXY=$X509_USER_PROXY"
else
do_print "Error: X509_USER_PROXY points to a non existing location"
exit $NAG_CRITICAL
fi
# workaround to suppress voms errors on OSG
export VOMS_PROXY_INFO_DONT_VERIFY_AC="1"
# Setup the grid environment:
if [ -n "$OSG_GRID" ] ; then
[ -f $OSG_GRID/setup.sh ] && source $OSG_GRID/setup.sh
fi
dn=`voms-proxy-info --identity`
fqan=`voms-proxy-info --fqan | head -1`
add2buffer "DN: $dn"
add2buffer "Primary FQAN: $fqan"
# Set the CMS environment
if [ -n "$VO_CMS_SW_DIR" ]; then
SW_DIR=$VO_CMS_SW_DIR
add2buffer "VO_CMS_SW_DIR=$VO_CMS_SW_DIR"
elif [ -n "$OSG_APP" ] ; then
SW_DIR=$OSG_APP/cmssoft/cms
add2buffer "OSG_APP/cmssoft/cms=$OSG_APP/cmssoft/cms"
elif [ -n "$CVMFS" ] ; then
SW_DIR=$CVMFS/cms.cern.ch
add2buffer "CVMFS (via env)=$CVMFS/cms.cern.ch"
elif [ -e "/cvmfs/cms.cern.ch" ] ; then
SW_DIR=/cvmfs/cms.cern.ch
add2buffer "CVMFS=/cvmfs/cms.cern.ch"
else
do_print "Error: None of OSG_APP, VO_CMS_SW_DIR, or CVMFS are present."
exit $NAG_CRITICAL
fi
if [ ! -f $SW_DIR/cmsset_default.sh ]; then
do_print "Error: cmssw setup file $SW_DIR/cmsset_default.sh not existing"
exit $NAG_CRITICAL