Commit a605e7ea authored by Andrea Sciaba's avatar Andrea Sciaba
Browse files

fixes in xrootd-fallback, remotestageout, mc and SRM tests

parent 3eb359e4
......@@ -89,7 +89,7 @@ if [ $EXIT_STATUS -ne 0 -a $EXIT_STATUS -ne 4 ] ; then
exit $SAME_ERROR
elif [ $EXIT_STATUS -eq 4 ]; then
echo "WARNING: Stage Out Test failed to cleanup the test file: $EXIT_STATUS"
exit "summary: CLEANUP_FAILED"
echo "summary: CLEANUP_FAILED"
exit $SAME_WARNING
else
echo "Stage Out Test successful"
......
......@@ -77,6 +77,8 @@ LCG_UTIL_TIMEOUT_CONNECT = 10
LCG_UTIL_TIMEOUT_SENDRECEIVE = 120
LCG_UTIL_TIMEOUT_SRM = 180
gfal2.set_verbose(gfal2.verbose_level.debug)
class SRMVOMetrics(probe.MetricGatherer) :
"""A Metric Gatherer specific for SRM."""
......@@ -662,7 +664,11 @@ class SRMVOMetrics(probe.MetricGatherer) :
def metricVOPut(self):
"Copy a local file to the SRM into space area(s) defined by VO."
def event_callback(event):
self.printd("[%s] %s %s %s" % (event.timestamp, event.domain, event.stage, event.description))
self.printd(self.gfal2_ver)
# generate source file
......@@ -684,6 +690,7 @@ class SRMVOMetrics(probe.MetricGatherer) :
params = ctx.transfer_parameters()
params.create_parent = True
params.timeout = self._timeouts['srm_connect']
params.event_callback = event_callback
if self._voInfoDictionary[srmendpt].get('space_token', None):
params.dst_spacetoken = self._voInfoDictionary[srmendpt]['space_token']
......@@ -860,6 +867,9 @@ class SRMVOMetrics(probe.MetricGatherer) :
def metricVOGet(self):
"Copy given remote file(s) from SRM to a local file."
def event_callback(event):
self.printd("[%s] %s %s %s" % (event.timestamp, event.domain, event.stage, event.description))
self.printd(self.gfal2_ver)
# Instantiate gfal2
......@@ -879,6 +889,7 @@ class SRMVOMetrics(probe.MetricGatherer) :
# Set transfer parameters
params = ctx.transfer_parameters()
params.timeout = self._timeouts['srm_connect']
params.event_callback = event_callback
if self._voInfoDictionary[srmendpt].get('space_token_get', None):
params.src_spacetoken = self._voInfoDictionary[srmendpt]['space_token_get']
params.overwrite = True
......
......@@ -14,6 +14,22 @@ import optparse
import shutil
from distutils.spawn import find_executable
import xml.etree.ElementTree as ET
# ########################################################################### #
CSWNFB_FILES = [ "/store/mc/SAM/GenericTTbar/AODSIM/" + \
"CMSSW_9_2_6_91X_mcRun1_realistic_v2-v1/00000/" + \
"A64CCCF2-5C76-E711-B359-0CC47A78A3F8.root",
"/store/mc/SAM/GenericTTbar/AODSIM/" + \
"CMSSW_9_2_6_91X_mcRun1_realistic_v2-v1/00000/" + \
"AE237916-5D76-E711-A48C-FA163EEEBFED.root",
"/store/mc/SAM/GenericTTbar/AODSIM/" + \
"CMSSW_9_2_6_91X_mcRun1_realistic_v2-v1/00000/" + \
"CE860B10-5D76-E711-BCA8-FA163EAA761A.root" ]
CSWNFB_SITES = ["T1_FR_CCIN2P3", "T1_RU_JINR", "T2_CN_Beijing", "T2_BE_IIHE", \
"T2_FR_GRIF_LLR", "T2_HU_Budapest", "T2_UK_London_Brunel", \
"T2_UK_London_IC", "T2_US_Nebraska", "T2_US_Wisconsin"]
log = logging.getLogger()
......@@ -318,8 +334,16 @@ def main():
# Setup CMSSW, run command
cmssw_file="/store/mc/SAM/GenericTTbar/AODSIM/CMSSW_9_0_0_90X_mcRun1_realistic_v4-v1/10000/28B9D1FB-8B31-E711-AA4E-0025905B85B2.root"
xrootd_file="/store/test/xrootd/CMSSAM/%s" % cmssw_file
rndm_file = random.randint(0, len(CSWNFB_FILES) - 1)
log.info("Fallback file: %s" % CSWNFB_FILES[rndm_file])
#
rndm_site = random.randint(0, len(CSWNFB_SITES) - 1)
if ( CSWNFB_SITES[rndm_site] == siteName ):
rndm_site = (rndm_site + 1) % len(CSWNFB_SITES)
log.info("Fallback site: %s" % CSWNFB_SITES[rndm_site])
xrootd_file = "/store/test/xrootd/" + CSWNFB_SITES[rndm_site] + CSWNFB_FILES[rndm_file]
log.info("Xrootd file we will test: %s" % xrootd_file)
shutil.rmtree("xrootd-fallback", 1)
......@@ -352,20 +376,33 @@ def main():
if exitCode:
return print_summary("Failure when setting up scramv1 runtime (CMSSW %s, SCRAM_ARCH=%s)" % (cmssw_version, scram_arch), NAG_CRITICAL)
stdout, exitCode = runCommand("cmsRun", ["test_xrootd.py"], opts, combineStd=True)
for line in stdout.split('\n'):
if re.search('opened', line) or re.search('redirect', line):
print line
no_trial = 0
while ( no_trial < 3 ):
stdout, exitCode = runCommand("cmsRun", ["test_xrootd.py"], opts, combineStd=True)
no_trial += 1
for line in stdout.split('\n'):
if re.search('opened', line) or re.search('redirect', line):
print line
maxlen = 12*1024
if opts.verbose:
maxlen = 50*1024
maxlen = 12*1024
if opts.verbose:
maxlen = 50*1024
if len(stdout) > maxlen:
stdout = "cmsRun output truncated - only last %d KB shown:\n" % (maxlen/1024) + stdout[-(maxlen):]
if ( exitCode == 0 ):
break
if len(stdout) > maxlen:
stdout = "cmsRun output truncated - only last %d KB shown:\n" % (maxlen/1024) + stdout[-(maxlen):]
if exitCode:
log.error("Failed cmsRun. Output:")
print stdout
rndm_site = (rndm_site + 1) % len(CSWNFB_SITES)
if ( CSWNFB_SITES[rndm_site] == siteName ):
rndm_site = (rndm_site + 1) % len(CSWNFB_SITES)
log.info("retrying with fallback site: %s" % CSWNFB_SITES[rndm_site])
if exitCode:
log.error("Failed cmsRun. Output:")
returnCode = NAG_CRITICAL
if numCatalogs > 1: # Fallback correctly configured, so only WARN
returnCode = NAG_WARNING
......@@ -373,13 +410,13 @@ def main():
log.info("Successful cmsRun.")
# Return the correct exit code.
if exitcode != NAG_OK:
if exitCode != NAG_OK:
log.info("Output:")
print stdout
log.warning("Execution contains warnings")
else:
log.info("Success!")
return exitcode
return exitCode
if __name__ == '__main__':
sys.exit(main())
......
......@@ -40,7 +40,7 @@ class StageOutDiagnostic:
self.options = None
self.datestamp = time.asctime(time.localtime(time.time()))
self.datestamp = self.datestamp.replace(" ", "-").replace(":", "_")
self.testLFN = "/store/user/sam/SAM/RemoteStageOutTest-%i-%s" % (os.getpid(), self.datestamp)
self.testLFN = "/store/user/sam/SAM/RemoteStageOutTest-%s-%s" % (os.popen("hostid").read().strip(), self.datestamp)
self.testLFNprefixes = [ "gsiftp://eoscmsftp.cern.ch//eos/cms", \
"srm://stormfe1.pi.infn.it:8444/srm/managerv2?SFN=/cms", \
"gsiftp://red-gridftp.unl.edu//mnt/hadoop/user/uscms01/pnfs/unl.edu/data4/cms" ]
......
......@@ -4,7 +4,7 @@
Summary: WLCG Compliant Probes from %{site}
Name: nagios-plugins-wlcg-org.cms
Version: 1.1.52
Version: 1.1.53
Release: 1%{?dist}
License: GPL
......@@ -52,6 +52,8 @@ install --directory %{buildroot}/etc/cron.d
/etc/cron.d/cms_glexec
%changelog
* Tue Nov 6 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.53-1.
- fixes in xrootd-fallback, remotestageout, mc and SRM tests
* Fri Jul 20 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.52-1.
- moved isolation test to lcgadmin
- disabled pilot role submission
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment