Commit 3eb359e4 authored by Andrea Sciaba's avatar Andrea Sciaba
Browse files

Several changes

parent 76cbeab1
#!/bin/bash
TEST_SCRIPT=`basename $0 | sed 's/\.sing//'`
echo "Will run $TEST_SCRIPT with Singularity if available"
source ${SAME_SENSOR_HOME}/tests/CE-cms-singularity-wrapper ${TEST_SCRIPT}
......@@ -823,10 +823,10 @@ def probe_version(args, io):
elif "X509_USER_PROXY" in os.environ:
if ( not os.path.isfile( os.environ["X509_USER_PROXY"] ) ):
io.write("\nbad value of X509_USER_PROXY environmental variable\n")
io.set_status(nap.UNKNOWN, "plugin certificate error")
io.set_status(nap.UNKNOWN, "plugin certificate file error")
elif ( not os.path.isfile( "/tmp/x509up_u" + str(os.getuid()) ) ):
io.write("\nNo x509 proxy certificate found\n")
io.set_status(nap.UNKNOWN, "plugin certificate error")
io.set_status(nap.UNKNOWN, "plugin no certificate")
if args.ipv4 and ( not args.ipv6 ):
......@@ -841,7 +841,7 @@ def probe_version(args, io):
try:
rc, outerr = nap.core.sub_process(cmd, timeout=60)
except subprocess.TimeoutExpired:
io.set_status(nap.CRITICAL, "querying version timed out")
io.set_status(nap.CRITICAL, "version query timed out")
return
io.write(outerr)
#
......@@ -891,32 +891,49 @@ def probe_read(args, io):
# filename, flags=(OpenFlags.READ | OpenFlags.REFRESH),
# timeout=180)
if ( not status.ok ):
io.set_status(nap.CRITICAL, ("failed to open test file \"" + \
"%s\", error \"%s\"") % (filename, status.message))
io.write(("\nopen(root://%s/%s, flags=OpenFlags.READ, time" + \
"out=90)\nXRootDStatus.code=%d \"%s\"\n") % \
(args.endpoint, filename, status.code, \
status.message.replace("\n", "")))
io.set_status(nap.CRITICAL, ("failed to open test file, " + \
"rc=%d") % status.shellcode)
return
#
status, data = f.read(offset=0, size=65536, timeout=30)
if ( not status.ok ):
io.write(("\n%s\nread(offset=0, size=65536, timeout=30)\n" + \
"XRootDStatus.code=%d \"%s\"\n") % (filename, \
status.code, status.message.replace("\n", "")))
io.set_status(nap.CRITICAL, "failed to read file block 0")
return
#
chksum = hex(zlib.adler32(data) & 0xffffffff)[2:]
if ( chksum != file['adlr'] ):
io.set_status(nap.CRITICAL, ("block 0 checksum mismatch, t" + \
"est file \"%s\", adler32 is \"%s\" should be \"%s\"") % \
(filename, chksum, file['adlr']))
io.write(("\nblock 0 checksum mismatch, test file \"%s\", " + \
"adler32 is \"%s\" should be \"%s\"\n") % \
(filename, chksum, file['adlr']))
io.set_status(nap.CRITICAL, "file block 0 checksum mismatch")
return
#
rndm = random.randint(1, file['blck'] - 1)
#
status, data = f.read(offset=rndm*65536, size=65536, timeout=30)
if ( not status.ok ):
io.set_status(nap.CRITICAL, ("failed to read block %d of t" + \
"est file \"%s\", error \"%s\"") % (rndm, filename, \
status.message))
io.write(("\n%s\nread(offset=%d, size=65536, timeout=30)\n" + \
"XRootDStatus.code=%d \"%s\"\n") % \
(filename, rndm*65536, status.code, \
status.message.replace("\n", "")))
io.set_status(nap.CRITICAL, "failed to read file block %d")
return
#
chk73c = CSXE_ASCII73CODE[ (zlib.adler32(data) & 0xffffffff) % 73 ]
if ( chk73c != file['code'][rndm] ):
io.set_status(nap.CRITICAL, ("block %d checksum mismatch, " + \
"test file \"%s\", code is \"%c\" should be \"%c\"") % \
(rndm, filename, chk73c, file['code'][rndm]))
io.write(("\nblock %d checksum mismatch, test file \"%s\"," + \
" adler32 code-73 is \"%s\" should be \"%s\"\n") % \
(rndm, filename, chk73c, file['code'][rndm]))
io.set_status(nap.CRITICAL, "file block %d checksum mismatch" \
% rndm)
return
#
host = f.get_property('DataServer')
......@@ -965,15 +982,14 @@ def probe_contain(args, io):
if ( status.ok ):
host = ( f.get_property('DataServer') ).split(":",1)[0]
epnt = ( args.endpoint ).split(":",1)[0]
io.write(("\nmanaged to open foreign test file \"/store/test/" + \
"xrootd/%s%s\" from host %s\n") % \
(site, file['name'], host))
cnt = len(epnt) - epnt.find(".")
if ( host[-cnt:] == epnt[-cnt:] ):
io.set_status(nap.WARNING, ("managed to open foreign test" + \
" file \"%s\" from host %s") % ("/store/test/xrootd/" + \
site + file['name'], host))
io.set_status(nap.WARNING, "managed to open foreign test file")
else:
io.set_status(nap.CRITICAL, ("managed to open foreign test" + \
" file \"%s\"") % ("/store/test/xrootd/" + site + \
file['name']))
io.set_status(nap.CRITICAL, "managed to open foreign test file")
return
io.write("\nFile \"%s\" correctly not accessible from site %s\n" % \
("/store/test/xrootd/" + site + file['name'], args.sitename))
......
......@@ -770,7 +770,7 @@ class SRMVOMetrics(probe.MetricGatherer) :
summary = ''
for surl in req['surls']:
summary += 'listing [%s]' % surl
# summary += 'listing [%s]' % surl
self.printd('listing [%s]' % surl)
try:
# FIXME: Set timeout?
......
#!/bin/bash
TEST_SCRIPT=`basename $0 | sed 's/\.sing//'`
echo "Will run $TEST_SCRIPT with Singularity if available"
source ${SAME_SENSOR_HOME}/tests/CE-cms-singularity-wrapper ${TEST_SCRIPT}
......@@ -274,11 +274,11 @@ else
echo "GFAL2 clients: $gfalver"
stageout=0
fi
if [ $stageout == 1 ] ; then
echo "ERROR: no valid command for remote stageout"
echo "summary: STAGEOUT_CMD_INVALID"
exit $SAME_ERROR
fi
#if [ $stageout == 1 ] ; then
# echo "ERROR: no valid command for remote stageout"
# echo "summary: STAGEOUT_CMD_INVALID"
# exit $SAME_ERROR
#fi
echo
if [ $warn == 1 ] ; then
echo $summary
......
#! /bin/bash
#
# CE-cms-isolation
#
# This test runs the Singularity and the glexec tests and passes only if one of the tests passes
exec $SAME_SENSOR_HOME/tests/CE-cms-singularity
......@@ -65,12 +65,22 @@ fi
OSG_SINGULARITY_EXTRA_OPTS="--home ${SAME_SENSOR_HOME}:/srv --bind /cvmfs --contain"
# Various possible mount points to pull into the container:
for VAR in /lfs_roots /cms /hadoop /hdfs /mnt/hadoop /etc/cvmfs/SITECONF; do
for VAR in /lfs_roots /storage /cms /hadoop /hdfs /mnt/hadoop /etc/cvmfs/SITECONF; do
if [ -e "$VAR" ]; then
OSG_SINGULARITY_EXTRA_OPTS="$OSG_SINGULARITY_EXTRA_OPTS --bind $VAR"
fi
done
echo "OSG_SINGULARITY_EXTRA_OPTS = ${OSG_SINGULARITY_EXTRA_OPTS}"
if [ -f "$X509_USER_PROXY" ] ; then
cp -f $X509_USER_PROXY ${SAME_SENSOR_HOME}/proxy.pem
elif [ -f /tmp/x509up_u`id -u` ] ; then
cp -f /tmp/x509up_u`id -u` ${SAME_SENSOR_HOME}/proxy.pem
else
echo "ERRROR: proxy not found!"
echo "summary: PROXY_NOT_FOUND"
exit $SAME_ERROR
fi
export X509_USER_PROXY=/srv/proxy.pem
cd /cvmfs/singularity.opensciencegrid.org
$OSG_SINGULARITY_PATH exec $OSG_SINGULARITY_EXTRA_OPTS \
......
......@@ -13,6 +13,7 @@ import socket
import logging
import optparse
import subprocess
from distutils.spawn import find_executable
import xml.etree.ElementTree as ET
################
......@@ -227,7 +228,7 @@ def main():
# Workaround to suppress voms errors on OSG
os.environ["VOMS_PROXY_INFO_DONT_VERIFY_AC"] = "1"
if 'OSG_GRID' in os.environ:
if 'OSG_GRID' in os.environ and not find_executable('gfal-copy'):
osg_setup = os.path.join(os.environ['OSG_GRID'], 'setup.sh')
if not os.path.isfile(osg_setup):
log.warning("$OSG_GRID is defined (%s), but %s does not exist." % (os.environ['OSG_GRID'], osg_setup))
......@@ -263,6 +264,7 @@ def main():
sw_dir = '/cvmfs/cms.cern.ch'
else:
return print_summary("None of $VO_CMS_SW_DIR, $OSG_APP, $CVMFS, or /cvmfs/cms.cern.ch available", NAG_CRITICAL)
sw_dir = '/cvmfs/cms.cern.ch'
log.info("Using software directory %s" % sw_dir)
sw_setup_script = os.path.join(sw_dir, 'cmsset_default.sh')
......
#!/bin/bash
TEST_SCRIPT=`basename $0 | sed 's/\.sing//'`
echo "Will run $TEST_SCRIPT with Singularity if available"
source ${SAME_SENSOR_HOME}/tests/CE-cms-singularity-wrapper ${TEST_SCRIPT}
......@@ -12,6 +12,7 @@ import socket
import logging
import optparse
import shutil
from distutils.spawn import find_executable
import xml.etree.ElementTree as ET
......@@ -223,7 +224,7 @@ def main():
# Workaround to suppress voms errors on OSG
os.environ["VOMS_PROXY_INFO_DONT_VERIFY_AC"] = "1"
if 'OSG_GRID' in os.environ:
if 'OSG_GRID' in os.environ and not find_executable('gfal-copy'):
osg_setup = os.path.join(os.environ['OSG_GRID'], 'setup.sh')
if not os.path.isfile(osg_setup):
log.warning("$OSG_GRID is defined (%s), but %s does not exist." % (os.environ['OSG_GRID'], osg_setup))
......@@ -255,9 +256,9 @@ def main():
return print_summary("VOMS proxy has expired.", NAG_CRITICAL)
# Set the CMS environment
if 'VO_CMS_SW_DIR' in os.environ:
if 'VO_CMS_SW_DIR' in os.environ and not find_executable('gfal-copy'):
sw_dir = os.environ['VO_CMS_SW_DIR']
elif 'OSG_APP' in os.environ:
elif 'OSG_APP' in os.environ and not find_executable('gfal-copy'):
sw_dir = os.path.join(os.environ['OSG_APP'], 'cmssoft', 'cms')
if not os.path.exists(sw_dir):
sw_dir = os.environ['OSG_APP']
......@@ -267,6 +268,7 @@ def main():
sw_dir = '/cvmfs/cms.cern.ch'
else:
return print_summary("None of $VO_CMS_SW_DIR, $OSG_APP, $CVMFS, or /cvmfs/cms.cern.ch available", NAG_CRITICAL)
sw_dir = '/cvmfs/cms.cern.ch'
log.info("Using software directory %s" % sw_dir)
sw_setup_script = os.path.join(sw_dir, 'cmsset_default.sh')
......
#!/bin/bash
TEST_SCRIPT=`basename $0 | sed 's/\.sing//'`
echo "Will run $TEST_SCRIPT with Singularity if available"
source ${SAME_SENSOR_HOME}/tests/CE-cms-singularity-wrapper ${TEST_SCRIPT}
......@@ -15,15 +15,13 @@ FLAVOR_MAP = {'CREAM-CE': 'cream',
'GLOBUS': 'gt',
'OSG-CE': 'gt'}
CE_STATE_METRICS = (
'org.sam.CONDOR-JobState-/cms/Role=lcgadmin',
'org.sam.CONDOR-JobState-/cms/Role=pilot')
CE_STATE_METRICS = [
'org.sam.CONDOR-JobState-/cms/Role=lcgadmin']
CE_METRICS = (
'org.sam.CONDOR-JobSubmit-/cms/Role=lcgadmin',
'org.sam.CONDOR-JobSubmit-/cms/Role=pilot',
'org.cms.WN-analysis-/cms/Role=lcgadmin',
'org.cms.WN-isolation-/cms/Role=pilot',
'org.cms.WN-isolation-/cms/Role=lcgadmin',
'org.cms.WN-basic-/cms/Role=lcgadmin',
'org.cms.WN-cvmfs-/cms/Role=lcgadmin',
'org.cms.WN-env-/cms/Role=lcgadmin',
......
......@@ -62,7 +62,7 @@ define service {
define service {
use sam-generic-wn-active
service_description org.cms.WN-frontier-<VOMS>
check_command samtest-run-sensor!CE-cms-frontier
check_command samtest-run-sensor!CE-cms-frontier.sing
}
define service {
use sam-generic-wn-active
......@@ -72,17 +72,17 @@ define service {
define service {
use sam-generic-wn-active
service_description org.cms.WN-analysis-<VOMS>
check_command samtest-run-sensor!CE-cms-analysis
check_command samtest-run-sensor!CE-cms-analysis.sing
}
define service {
use sam-generic-wn-active
service_description org.cms.WN-xrootd-access-<VOMS>
check_command samtest-run-sensor!CE-cms-xrootd-access
check_command samtest-run-sensor!CE-cms-xrootd-access.sing
}
define service {
use sam-generic-wn-active
service_description org.cms.WN-xrootd-fallback-<VOMS>
check_command samtest-run-sensor!CE-cms-xrootd-fallback
check_command samtest-run-sensor!CE-cms-xrootd-fallback.sing
}
define service {
use sam-generic-wn-active
......@@ -92,7 +92,7 @@ define service {
define service {
use sam-generic-wn-active
service_description org.cms.WN-remotestageout-<VOMS>
check_command samtest-run-sensor!CE-cms-remotestageout
check_command samtest-run-sensor!CE-cms-remotestageout.sing
}
define service {
use sam-generic-wn-active
......@@ -104,3 +104,8 @@ define service {
service_description org.cms.WN-mc-<VOMS>
check_command samtest-run-sensor!CE-cms-mc.sing
}
define service {
use sam-generic-wn-active
service_description org.cms.WN-isolation-<VOMS>
check_command samtest-run-sensor!CE-cms-isolation
}
......@@ -4,7 +4,7 @@
Summary: WLCG Compliant Probes from %{site}
Name: nagios-plugins-wlcg-org.cms
Version: 1.1.51
Version: 1.1.52
Release: 1%{?dist}
License: GPL
......@@ -52,12 +52,22 @@ install --directory %{buildroot}/etc/cron.d
/etc/cron.d/cms_glexec
%changelog
* Fri Jul 20 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.52-1.
- moved isolation test to lcgadmin
- disabled pilot role submission
- shorter summary for xrootd test
- removed surl printout in summary of SE tests
- added /storage to the paths to mount in Singularity
- copy proxy inside Singularity
- tweak to library search in xrootd tests
- moved several tests inside Singularity
* Fri May 4 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.51-1.
- added workaround in etf plugin for xrootd tag
* Thu May 3 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.50-1.
- added xrootd probe
- new etf plugin to remove usage of BDII
- made test file name for mc test more random
- removed gfal-copy check from CE-cms-env
* Wed Apr 18 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.49-1.
- glexec test eliminated from isolation test
- removed double binds in singularity
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment