Commit 52014b53 authored by Andrea Sciaba's avatar Andrea Sciaba
Browse files

*** empty log message ***

parent 501f01ba
JOB_OUTPUT_CMD="glite-wms-job-output"
JOB_STATUS_CMD="glite-wms-job-status"
JOB_SUBMIT_CMD="glite-wms-job-submit -c $GLITE_WMS_CLIENT_CONFIG -a"
JOB_CANCEL_CMD="glite-wms-job-cancel --noint"
JOB_LOGGING_INFO_CMD="glite-wms-job-logging-info -v 3"
JOB_SENSOR_NAME="testjob"
JOB_TEST_NAME="CE-cms-prod"
JOB_OUTPUT_CMD="glite-wms-job-output"
JOB_STATUS_CMD="glite-wms-job-status"
JOB_SUBMIT_CMD="glite-wms-job-submit -c $GLITE_WMS_CLIENT_CONFIG -a"
JOB_CANCEL_CMD="glite-wms-job-cancel --noint"
JOB_LOGGING_INFO_CMD="glite-wms-job-logging-info -v 3"
JOB_SENSOR_NAME="testjob"
JOB_TEST_NAME="CE-cms-prod"
JOB_OUTPUT_CMD="glite-wms-job-output"
JOB_STATUS_CMD="glite-wms-job-status"
JOB_SUBMIT_CMD="glite-wms-job-submit -c $GLITE_WMS_CLIENT_CONFIG -a"
JOB_CANCEL_CMD="glite-wms-job-cancel --noint"
JOB_LOGGING_INFO_CMD="glite-wms-job-logging-info -v 3"
JOB_SENSOR_NAME="testjob"
JOB_TEST_NAME="CE-sft-job"
JOB_OUTPUT_CMD="glite-wms-job-output"
JOB_STATUS_CMD="glite-wms-job-status"
JOB_SUBMIT_CMD="glite-wms-job-submit -c $GLITE_WMS_CLIENT_CONFIG -a"
JOB_CANCEL_CMD="glite-wms-job-cancel --noint"
JOB_LOGGING_INFO_CMD="glite-wms-job-logging-info -v 3"
JOB_SENSOR_NAME="testjob"
JOB_TEST_NAME="CE-sft-job"
lxdpm101.cern.ch
lxn1183.cern.ch
srm-cms.cern.ch
castorsrm.cern.ch
castorgrid.cern.ch
Executable = "/bin/sh";
Arguments = "-x testjob.sh";
StdOutput = "testjob.out";
StdError = "testjob.out";
InputSandbox = {"testjob.sh","testjob.tgz","same.conf"};
OutputSandbox = {"testjob.out","testjob-results.tgz"};
Requirements = other.GlueCEPolicyMaxWallClockTime >= 25 && other.GlueCEInfoHostName == "<nodeName>";
RetryCount = 0;
ShallowRetryCount = -1;
Rank = - other.GlueCEPolicyMaxWallClockTime;
#!/bin/bash
echo "Test OK"
exit $SAME_OK
testName: CE-cms-dummy
testTitle: Dummy test
testAbbr: dummy
testHelp: none
#!/bin/bash
#****** CE/CE-sft-job
# NAME
# CE-sft-job - This is a pseudo-test executed by SAM on the submission UI to submit
# the actual tests to CEs and publish the results of the test job submission and
# output retrieval. Succeeds only if the job finished successfully and the output
# was retrieved.
#
# AUTHOR
#
# SAM Team same-devel[at]cern.ch
#
# LAST UPDATED
#
# 2008-28-04
#
# LANGUAGE
#
# bash
#
# SOURCE
. $SAME_SENSOR_HOME/config.sh
test $SAME_CE_CONFIG && test -e $SAME_CE_CONFIG && . $SAME_CE_CONFIG
log="CE-sft-job.log"
envName=`cat envName`
centralSE=`cat centralSE`
LFC_HOST=`cat LFC_HOST`
inMaintenance=`cat inMaintenance`
# Test timeout. We have to die gracefully while "reporting" as much as we've collected.
test_timeout_trap() {
# signaled at submission time
if [ ! -e testjob.jid ] ; then
exec 1>&- 1>&3 2>&4 3>&- 4>&-
echo "Job submission failed!" >> $log
echo "summary: RBError" >> $log
else # signaled at getting job status
echo "Getting job status failed!" >> $log
echo "summary: LBError" >> $log
fi
echo "</pre>" >> $log
cat $log
exit $SAME_WARNING
}
trap 'set +x; test_timeout_trap' ALRM
if [ "x$1" == "x--publish" ] ; then
if [ ! -e testjob.jid ] ; then
exit 0
fi
RETCODE=$SAME_OK
age=$((`date +%s` - `stat -c %Y testjob.jid`))
JOBSTATUS=$(mktemp jobstatus-$$-XXXX)
$JOB_STATUS_CMD -i testjob.jid > $JOBSTATUS
jobStatus=`grep "^Current Status:" $JOBSTATUS | sed -e "s/Current Status:\s*//" -e "s/\s*$//"`
if [ \( "$jobStatus" == "Submitted" -o "$jobStatus" == "Waiting" -o "$jobStatus" == "Ready" \) -a $age -ge 21600 ] ; then
cat $log
echo "<h2>Job status:</h2>"
echo "<pre>"
cat $JOBSTATUS
echo "</pre>"
echo "<h2>Job stuck with logging info:</h2>"
echo "<pre>"
$JOB_LOGGING_INFO_CMD -i testjob.jid > testjob.log
cat testjob.log
echo "</pre>"
echo "summary: STUCK"
RETCODE=$SAME_NOTICE
rm -f testjob.jid $JOBSTATUS
exit $RETCODE
fi
if [ "$jobStatus" == "Aborted" ] ; then
cat $log
echo "<h2>Job status:</h2>"
echo "<pre>"
cat $JOBSTATUS
echo "</pre>"
echo "<h2>Job failed with logging info:</h2>"
echo "<pre>"
$JOB_LOGGING_INFO_CMD -i testjob.jid > testjob.log
cat testjob.log
echo "</pre>"
if egrep "Reason.*request expired" testjob.log > /dev/null 2>&1 ; then
if egrep "Reason.*BrokerHelper: no compatible resources" testjob.log > /dev/null 2>&1 ; then
echo "summary: LISTMATCHFAILED"
RETCODE=$SAME_ERROR
else
echo "summary: Check WMS"
RETCODE=$SAME_NOTICE
fi
elif grep "the user proxy expired (job is still running)" testjob.log > /dev/null 2>&1 ; then
echo "summary: PROXYEXPIRED"
RETCODE=$SAME_WARNING
else
echo "summary: FAILED"
RETCODE=$SAME_ERROR
fi
rm -f testjob.jid $JOBSTATUS
exit $RETCODE
fi
if [ -e testjob.out ] ; then
cat $log
echo "<h2>Job output:</h2>"
echo "<pre>"
cat testjob.out
echo "</pre>"
grep -q "Problem unpacking SAM Framework" testjob.out
if [ $? -eq 0 ] ; then
echo "summary: Problem unpacking SAM Framework on WN"
RETCODE=$SAME_ERROR
fi
rm -f testjob.out testjob.jid
exit $RETCODE
fi
rm -f $JOBSTATUS
exit 0
else
if [ -e testjob.jid ] ; then
# job exists, do nothing
exit 0
fi
siteName=$1
nodeName=$2
rm -f $log testjob.jid
echo "<pre>" >> $log
date --utc >> $log
echo "Submitting from host: $(hostname -f)" >> $log
echo "DN: $(voms-proxy-info -identity)" >> $log
echo "</pre>" >> $log
echo "<h2>Generating JDL file:</h2>" >> $log
sed -e "s/<nodeName>/$nodeName/g" -e "s/<centralSE>/$centralSE/g" \
-e "s/<LFC_HOST>/$LFC_HOST/g" -e "s/<envName>/$envName/g" \
$SAME_SENSOR_HOME/testjob.sh > testjob.sh
cp -f $SAME_SENSOR_HOME/testjob.jdl testjob.jdl
sed -i -e "s/<nodeName>/$nodeName/g" testjob.jdl
echo "<pre>" >> $log
cat testjob.jdl >> $log
echo "</pre>" >> $log
echo "<b>content of <i>testjob.sh</i></b>" >> $log
echo "<pre>" >> $log
cat testjob.sh >> $log
echo "</pre>" >> $log
echo "<h2>Submitting a job</h2>" >> $log
echo "<pre>" >> $log
exec 3>&1 4>&2 1>>$log 2>&1
set -x
$JOB_SUBMIT_CMD -o testjob.jid testjob.jdl
set +x
exec 1>&- 1>&3 2>&4 3>&- 4>&-
echo "</pre>" >> $log
if [ ! -e testjob.jid ] ; then
echo "Job submission failed! Check the Resource Broker" >> $log
echo "summary: RBError" >> $log
cat $log
exit $SAME_NOTICE
fi
exit 0
fi
#****
testName: CE-cms-prod
testTitle: CMS prod job submission
testAbbr: jsprod
testHelp: https://twiki.cern.ch/twiki/bin/view/CMS/SAMProd
#!/bin/bash
#****** CE/CE-sft-job
# NAME
# CE-sft-job - This is a pseudo-test executed by SAM on the submission UI to submit
# the actual tests to CEs and publish the results of the test job submission and
# output retrieval. Succeeds only if the job finished successfully and the output
# was retrieved.
#
# AUTHOR
#
# SAM Team same-devel[at]cern.ch
#
# LAST UPDATED
#
# 2008-28-04
#
# LANGUAGE
#
# bash
#
# SOURCE
. $SAME_SENSOR_HOME/config.sh
test $SAME_CE_CONFIG && test -e $SAME_CE_CONFIG && . $SAME_CE_CONFIG
log="CE-sft-job.log"
envName=`cat envName`
centralSE=`cat centralSE`
LFC_HOST=`cat LFC_HOST`
inMaintenance=`cat inMaintenance`
# Test timeout. We have to die gracefully while "reporting" as much as we've collected.
test_timeout_trap() {
# signaled at submission time
if [ ! -e testjob.jid ] ; then
exec 1>&- 1>&3 2>&4 3>&- 4>&-
echo "Job submission failed!" >> $log
echo "summary: RBError" >> $log
else # signaled at getting job status
echo "Getting job status failed!" >> $log
echo "summary: LBError" >> $log
fi
echo "</pre>" >> $log
cat $log
exit $SAME_WARNING
}
trap 'set +x; test_timeout_trap' ALRM
if [ "x$1" == "x--publish" ] ; then
if [ ! -e testjob.jid ] ; then
exit 0
fi
RETCODE=$SAME_OK
age=$((`date +%s` - `stat -c %Y testjob.jid`))
JOBSTATUS=$(mktemp jobstatus-$$-XXXX)
$JOB_STATUS_CMD -i testjob.jid > $JOBSTATUS
jobStatus=`grep "^Current Status:" $JOBSTATUS | sed -e "s/Current Status:\s*//" -e "s/\s*$//"`
if [ \( "$jobStatus" == "Submitted" -o "$jobStatus" == "Waiting" -o "$jobStatus" == "Ready" \) -a $age -ge 21600 ] ; then
cat $log
echo "<h2>Job status:</h2>"
echo "<pre>"
cat $JOBSTATUS
echo "</pre>"
echo "<h2>Job stuck with logging info:</h2>"
echo "<pre>"
$JOB_LOGGING_INFO_CMD -i testjob.jid > testjob.log
cat testjob.log
echo "</pre>"
echo "summary: STUCK"
RETCODE=$SAME_NOTICE
rm -f testjob.jid $JOBSTATUS
exit $RETCODE
fi
if [ "$jobStatus" == "Aborted" ] ; then
cat $log
echo "<h2>Job status:</h2>"
echo "<pre>"
cat $JOBSTATUS
echo "</pre>"
echo "<h2>Job failed with logging info:</h2>"
echo "<pre>"
$JOB_LOGGING_INFO_CMD -i testjob.jid > testjob.log
cat testjob.log
echo "</pre>"
if egrep "Reason.*request expired" testjob.log > /dev/null 2>&1 ; then
if egrep "Reason.*BrokerHelper: no compatible resources" testjob.log > /dev/null 2>&1 ; then
echo "summary: LISTMATCHFAILED"
RETCODE=$SAME_ERROR
else
echo "summary: Check WMS"
RETCODE=$SAME_NOTICE
fi
else
echo "summary: FAILED"
RETCODE=$SAME_ERROR
fi
rm -f testjob.jid $JOBSTATUS
exit $RETCODE
fi
if [ -e testjob.out ] ; then
cat $log
echo "<h2>Job output:</h2>"
echo "<pre>"
cat testjob.out
echo "</pre>"
grep -q "Problem unpacking SAM Framework" testjob.out
if [ $? -eq 0 ] ; then
echo "summary: Problem unpacking SAM Framework on WN"
RETCODE=$SAME_ERROR
fi
rm -f testjob.jid testjob.out
exit $RETCODE
fi
rm -f $JOBSTATUS
exit 0
else
if [ -e testjob.jid ] ; then
# job exists, do nothing
exit 0
fi
siteName=$1
nodeName=$2
rm -f $log testjob.jid
echo "<pre>" >> $log
date --utc >> $log
echo "Submitting from host: $(hostname -f)" >> $log
echo "DN: $(voms-proxy-info -identity)" >> $log
echo "</pre>" >> $log
echo "<h2>Generating JDL file:</h2>" >> $log
sed -e "s/<nodeName>/$nodeName/g" -e "s/<centralSE>/$centralSE/g" \
-e "s/<LFC_HOST>/$LFC_HOST/g" -e "s/<envName>/$envName/g" \
$SAME_SENSOR_HOME/testjob.sh > testjob.sh
cp -f $SAME_SENSOR_HOME/testjob.jdl testjob.jdl
sed -i -e "s/<nodeName>/$nodeName/g" testjob.jdl
echo "<pre>" >> $log
cat testjob.jdl >> $log
echo "</pre>" >> $log
echo "<b>content of <i>testjob.sh</i></b>" >> $log
echo "<pre>" >> $log
cat testjob.sh >> $log
echo "</pre>" >> $log
echo "<h2>Submitting a job</h2>" >> $log
echo "<pre>" >> $log
exec 3>&1 4>&2 1>>$log 2>&1
set -x
$JOB_SUBMIT_CMD -o testjob.jid testjob.jdl
set +x
exec 1>&- 1>&3 2>&4 3>&- 4>&-
echo "</pre>" >> $log
if [ ! -e testjob.jid ] ; then
echo "Job submission failed! Check the Resource Broker" >> $log
echo "summary: RBError" >> $log
cat $log
exit $SAME_NOTICE
fi
exit 0
fi
#****
# Default configuration for SAME
[DEFAULT]
# Settings for locations
workdir=%(home)s/.same
logdir=%(same_home)s/var/log
resdir=%(same_home)s/var/results
secresdir=%(same_home)s/var/results-secure
webdir=%(same_home)s/web
cachedir=%(same_home)s/var/cache
master_vo=cms
# Logging levels:
# CRITICAL, ERROR, WARNING, INFO, DEBUG, NOTSET
# Logging level for the log file
loglevel=INFO
# Logging level for console messages
verbosity=CRITICAL
[sensors]
common_attrs="sitename nodename inmaintenance"
common_filter="type=Production ismonitored=y voname=%(master_vo)s"
CE_filter="serviceabbr=CE"
CREAMCE_filter="serviceabbr=CREAMCE"
ARCCE_filter="serviceabbr=ARCCE"
gCE_filter="serviceabbr=gCE"
FTS_filter="serviceabbr=FTS"
FTS_attrs="sitename nodename inmaintenance tier"
SE_filter="serviceabbr=SE"
SRM_filter="serviceabbr=SRM"
SRMv1_filter="serviceabbr=SRMv1"
SRMv2_filter="serviceabbr=SRMv2"
LFC_filter="serviceabbr=LFC voname=%(master_vo)s"
LFC_C_filter="serviceabbr=LFC_C voname=%(master_vo)s"
LFC_L_filter="serviceabbr=LFC_L voname=%(master_vo)s"
host-cert_attrs="nodename serviceabbr"
host-cert_filter="serviceabbr=FTS,LFC,LFC_C,LFC_L,VOMS,CE,CREAMCE,SRM,SRMv1,SRMv2,gRB,MyProxy,RB,SE,RGMA"
VOBOX_filter="serviceabbr=VOBOX voname=%(master_vo)s"
RB_filter="serviceabbr=RB"
gRB_filter="serviceabbr=gRB"
[statuscode]
ok=10
info=20
notice=30
warning=40
error=50
critical=60
maintenance=100
[submission]
vo=%(master_vo)s
test_timeout=1800
[scheduler]
max_processes=30
default_timeout=5400
shell=/bin/sh
[webservices]
publisher_wsdl=%(same_home)s/etc/publisher.wsdl
query_wsdl=%(same_home)s/etc/query.wsdl
[msg]
publish_cmd=%(same_home)s/bin/msg-publish
#change the value of publishing_mode to msg to enable MSG publishing
publishing_mode=samws
#!/bin/bash
#####################################################################
#
# Script to install SAM for CMS
#
# Usage: ./sam_install <sam_dir>
#
# Author: Andrea Sciaba' <Andrea.Sciaba@cern.ch>
#
#####################################################################
cms2sam () {
local orig=$1
local dest=$2
local cwd=`pwd`
cd $orig
for j in `find . -not -regex '.*\(CVS\).*'` ; do
if [ -d $j ] ; then
mkdir -p $dest/$j
else
cp $j $dest/`dirname $j`
fi
done
cd $cwd
}
cmstests2sam () {
local cmsdir=$1
local samdir=$2
local modules='FroNtier MonteCarlo testjob'
for i in $modules ; do
cms2sam $cmsdir/SiteTests/$i $samdir/client/sensors/testjob
done
local sensors='CE SRMv2'
for sensor in $sensors ; do
cms2sam $cmsdir/SiteTests/$sensor $samdir/client/sensors/$sensor
done
}
# Checkout SAM code
cwd=`pwd`
SAME_DIR=$1
CRON_DIR=$SAME_DIR/../same-cron
if [ -z "$SAME_DIR" ] ; then
echo "Usage: sam_install.sh <sam_dir>" >&2
exit 1
fi
if [ `echo $SAME_DIR | cut -c1` != '/' ] ; then
SAME_DIR=$cwd/$SAME_DIR
fi
read -p "Do you want to reinstall the SAM files (y/n)? " answer
if [ "$answer" == 'y' ] ; then
if [ -d $SAME_DIR ] ; then
echo "++++ Error: $SAME_DIR already exists. Remove it and re-run the script."
exit 1
fi
echo "++++ Installing the SAM files..."
export CVSROOT=:pserver:anonymous@glite.cvs.cern.ch:/cvs/glite
tmpdir=`mktemp -d`
cd $tmpdir
cvs co -r PROD same > /dev/null
if [ $? != 0 ] ; then
echo "++++ SAM CVS checkout failed"
rmdir $tmpdir
exit 1
fi
cd $cwd
mv -f $tmpdir/same $SAME_DIR
if [ $? != 0 ] ; then
echo "++++ Installation of SAM files failed"
rmdir $tmpdir
exit 1
fi
rmdir $tmpdir
echo "++++ SAM files installed"
fi
# Check out the CMS files
export CVSROOT=:pserver:anonymous@cmscvs.cern.ch:/cvs_server/repositories/CMSSW
cvs login
tmpdir=`mktemp -d`
cd $tmpdir
cvs co CMSSAM > /dev/null
if [ $? != 0 ] ; then
echo "++++ CMS CVS checkout failed"
rmdir $tmpdir
exit 1
fi
CMS_DIR=$tmpdir/CMSSAM
cd $cwd
# Install cron job scripts
read -p "Do you want to (re)install the SAM cron scripts (y/n)? " answer
if [ "$answer" == 'y' ] ; then
echo "++++ Installing SAM cron scripts..."
cms2sam $CMS_DIR/scripts/same-cron $CRON_DIR
echo "++++ SAM cron scripts installed. Edit them as required"
fi
# Install files from the CMS CVS
read -p "Do you want to (re)install the CMS SAM files (y/n)? " answer
if [ "$answer" == 'y' ] ; then
echo "++++ Installing CMS SAM tests..."
cmstests2sam $CMS_DIR $SAME_DIR
if [ $? != 0 ] ; then
echo "++++ Error: CMS SAM tests could not be installed"
rm -rf $tmpdir
exit 1
else
echo "++++ CMS SAM tests installed"
fi
fi
# Install SAM configuration file
workdir=`basename $SAME_DIR`
read -p "Do you want to (re)install the SAM configuration file (y/n)? " answer
if [ "$answer" == 'y' ] ; then
echo "++++ Installing SAM configuration file..."
cms2sam $CMS_DIR/config $SAME_DIR/client/etc
$CRON_DIR/makesitelist.pl $SAME_DIR/client/etc/same.conf.CMS $SAME_DIR/client/etc/same.conf $workdir
if [ $? != 0 ] ; then
echo "++++ Warning: SAM configuration file could not be written"
else
echo "++++ SAM configuration file installed"
fi
fi
read -p "Do you want to reset the SAM workdir ~/.$workdir (y/n)?" answer
if [ "$answer" == 'y' ] ; then
rm -rf $SAME_DIR/../.$workdir
echo "++++ Removed SAM workdir"