Commit c89c9f57 authored by Marian Babik's avatar Marian Babik
Browse files

Merge branch 'qa' of ssh://gitlab.cern.ch:7999/etf/cmssam into qa

parents 5785623f 0836a21f
......@@ -164,7 +164,7 @@ let localConfigFileAgeInHours=$seconds/3600
echo "SiteLocalConfigAge: ${localConfigFileAgeInHours} hours"
asCvs=1
cvsUrl="https://gitlab.cern.ch/SITECONF/${SiteName}/raw/master/JobConfig/site-local-config${subSiteName}.xml"
cvsUrl="https://gitlab.cern.ch/api/v4/projects/siteconf%2F${SiteName}/repository/files/JobConfig%2Fsite-local-config${subSiteName}.xml/raw?ref=master"
ConfigFileFromCVS=`mktemp /tmp/site-local-config-from-CVS.xml.XXX`
echo "GITCopy: $cvsUrl"
./fetch-from-web-gitlab \"$cvsUrl\" $ConfigFileFromCVS
......@@ -174,11 +174,14 @@ if [ $rc == 0 ] ; then
if [ $? == 1 ] ; then asCvs=0; fi
if [ $asCvs == 0 ] ; then
noConfigDump=1
cvsMarkupUrl="https://gitlab.cern.ch/SITECONF/${SiteName}/blob/master/JobConfig/site-local-config${subSiteName}.xml"
ConfigFileMarkup=`mktemp /tmp/site-local-config-from-CVS-Markup.xml.XXX`
./fetch-from-web-gitlab \"$cvsMarkupUrl\" $ConfigFileMarkup
cvsFileDate=`/bin/awk '{i=index($0,"datetime=");if(i>0){s=substr($0,i+10,4) "-" substr($0,i+15,2) "-" substr($0,i+18,2) " " substr($0,i+21,2) ":" substr($0,i+24,2) ":" substr($0,i+27,2) "Z";print s}}' $ConfigFileMarkup`
let seconds=`date +%s`-`date +%s -d "$cvsFileDate"`
FileInfoUrl="https://gitlab.cern.ch/api/v4/projects/siteconf%2F${SiteName}/repository/files/JobConfig%2Fsite-local-config${subSiteName}.xml?ref=master"
FileInfoTmp=`mktemp /tmp/site-local-config-from-CVS-Markup.xml.XXX`
./fetch-from-web-gitlab \"$FileInfoUrl\" $FileInfoTmp
last_commit=`jq -r .last_commit_id $FileInfoTmp`
CommitInfoUrl="https://gitlab.cern.ch/api/v4/projects/siteconf%2F${SiteName}/repository/commits/${last_commit}"
./fetch-from-web-gitlab \"$CommitInfoUrl\" $FileInfoTmp
FileDate=`jq -r .authored_date $FileInfoTmp`
let seconds=`date +%s`-`date +%s -d "$FileDate"`
let cvsFileAgeInHours=$seconds/3600
echo "GITConfigFileAge: ${cvsFileAgeInHours} hours"
if [ $localConfigFileAgeInHours -gt 120 ] && [ $cvsFileAgeInHours -gt 120 ] ; then
......@@ -210,7 +213,7 @@ echo "LocalTFCAge: ${localTFCfileAgeInHours} hours"
asCvs=1
TFCfileName=`echo $TFCPath | awk -F'/' '{print $NF}'`
cvsUrl="https://gitlab.cern.ch/SITECONF/${SiteName}/raw/master/PhEDEx/${TFCfileName}"
cvsUrl="https://gitlab.cern.ch/api/v4/projects/siteconf%2F${SiteName}/repository/files/PhEDEx%2F${TFCfileName}/raw?ref=master"
TfcFileFromCVS=`mktemp /tmp/storage-from-CVS.xml.XXXX`
echo "TFCGITCopy: $cvsUrl"
./fetch-from-web-gitlab \"$cvsUrl\" $TfcFileFromCVS
......@@ -221,12 +224,15 @@ if [ $rc == 0 ] ; then
if [ $? == 1 ] ; then asCvs=0; fi
if [ $asCvs == 0 ] ; then
noTFCDump=1
cvsMarkupUrl="https://gitlab.cern.ch/SITECONF/${SiteName}/blob/master/PhEDEx/${TFCfileName}"
TfcFileMarkup=`mktemp /tmp/TFC-from-CVS-Markup.xml.XXX`
FileInfoUrl="https://gitlab.cern.ch/api/v4/projects/siteconf%2F${SiteName}/repository/files/PhEDEx%2F${TFCfileName}?ref=master"
FileInfoTmp=`mktemp /tmp/TFC-from-CVS-Markup.xml.XXX`
echo "Fetch GIT Markup copy of TFC from $cvsMarkupUrl"
./fetch-from-web-gitlab \"$cvsMarkupUrl\" $TfcFileMarkup
cvsFileDate=`/bin/awk '{i=index($0,"datetime=");if(i>0){s=substr($0,i+10,4) "-" substr($0,i+15,2) "-" substr($0,i+18,2) " " substr($0,i+21,2) ":" substr($0,i+24,2) ":" substr($0,i+27,2) "Z";print s}}' $TfcFileMarkup`
let seconds=`date +%s`-`date +%s -d "$cvsFileDate"`
./fetch-from-web-gitlab \"$FileInfoUrl\" $FileInfoTmp
last_commit=`jq -r .last_commit_id $FileInfoTmp`
CommitInfoUrl="https://gitlab.cern.ch/api/v4/projects/siteconf%2F${SiteName}/repository/commits/${last_commit}"
./fetch-from-web-gitlab \"$CommitInfoUrl\" $FileInfoTmp
FileDate=`jq -r .authored_date $FileInfoTmp`
let seconds=`date +%s`-`date +%s -d "$FileDate"`
let cvsFileAgeInHours=$seconds/3600
echo "GITTFCAge: ${cvsFileAgeInHours} hours"
if [ $localTFCfileAgeInHours -gt 120 ] && [ $cvsFileAgeInHours -gt 120 ] ; then
......
......@@ -38,14 +38,14 @@ function check_df {
function check_quota {
dir=$1
fs=`df -kP $dir | awk '{if (NR==2) print $1}'`
myquotastr=`quota 2>/dev/null | awk '{if (NR>2) {if (NF==1) {n=$1; getline; print n " " $2-$1} else {print $1 " " $3-$2}}}' |grep $fs`
myquotastr=`timeout 15 quota 2>/dev/null | awk '{if (NR>2) {if (NF==1) {n=$1; getline; print n " " $2-$1} else {print $1 " " $3-$2}}}' |grep $fs`
if [ $? -eq 0 ]; then
# check only if there are any quotas, else ignore
myquota=`echo $myquotastr|awk '{print $2}'`
let "quotagb=$myquota / (2 * 1000)"
echo $quotagb
else
echo -1
fi
echo -1
return 0
}
......@@ -109,7 +109,6 @@ rm -rf $local_proxy_dir
type -t voms-proxy-info > /dev/null
result=$?
if [ $result -eq 0 ] ; then
isvoms=1
echo -n "UserDN: "
voms-proxy-info -identity
l=`voms-proxy-info -timeleft`
......@@ -117,13 +116,12 @@ if [ $result -eq 0 ] ; then
fqan=`voms-proxy-info -fqan`
echo "FQAN:"
echo "$fqan"
if [ $l -lt 21600 ] ; then
echo "WARNING: proxy shorther than 6 hours"
fi
else
isvoms=0
echo "WARNING: voms-proxy-info not found"
fi
if [ $isvoms -eq 1 -a $l -lt 21600 ] ; then
echo "WARNING: proxy shorther than 6 hours"
fi
# Test of the local worker node environment
echo
......
#!/bin/bash
TEST_SCRIPT=`basename $0 | sed 's/\.sing//'`
echo "Will run $TEST_SCRIPT with Singularity if available"
source ${SAME_SENSOR_HOME}/tests/CE-cms-singularity-wrapper ${TEST_SCRIPT}
......@@ -27,9 +27,9 @@ CSWNFB_FILES = [ "/store/mc/SAM/GenericTTbar/AODSIM/" + \
"/store/mc/SAM/GenericTTbar/AODSIM/" + \
"CMSSW_9_2_6_91X_mcRun1_realistic_v2-v1/00000/" + \
"CE860B10-5D76-E711-BCA8-FA163EAA761A.root" ]
CSWNFB_SITES = ["T1_FR_CCIN2P3", "T1_RU_JINR", "T2_CN_Beijing", "T2_BE_IIHE", \
"T2_FR_GRIF_LLR", "T2_HU_Budapest", "T2_UK_London_Brunel", \
"T2_UK_London_IC", "T2_US_Nebraska", "T2_US_Wisconsin"]
CSWNFB_SITES = ["T1_FR_CCIN2P3", "T2_US_Nebraska", "T1_RU_JINR", "T2_UK_London_Brunel", \
"T2_CN_Beijing", "T2_BE_IIHE", "T2_FR_GRIF_LLR", "T2_HU_Budapest", \
"T2_UK_London_IC", "T2_US_Wisconsin"]
......@@ -47,19 +47,24 @@ process.SiteLocalConfigService = cms.Service("SiteLocalConfigService",
overrideSourceCacheHintDir = cms.untracked.string("application-only"),
)
process.dump = cms.EDAnalyzer("EventContentAnalyzer", listContent=cms.untracked.bool(False), getData=cms.untracked.bool(True))
process.dump = cms.EDAnalyzer("EventContentAnalyzer",
listContent=cms.untracked.bool(False),
verboseForModuleLabels = cms.untracked.vstring("recoTracks_generalTracks"),
getDataForModuleLabels=cms.untracked.vstring("recoTracks_generalTracks"),
getData=cms.untracked.bool(True),
)
process.load("FWCore.MessageService.MessageLogger_cfi")
process.MessageLogger.cerr.FwkReport.reportEvery = 1
process.maxEvents = cms.untracked.PSet(
input = cms.untracked.int32(10)
input = cms.untracked.int32(1)
)
process.p = cms.EndPath(process.dump)
"""
def configure_logging(lvl=logging.INFO):
logger = logging.getLogger("cms.CE.xrootd-access")
logger = logging.getLogger("cms.CE.xrootd-fallback")
logger.setLevel(lvl)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(fmt="[%(process)d] %(asctime)s [%(levelname)07s]: %(message)s")
......@@ -73,14 +78,14 @@ def print_summary(summary, retval):
log.info(summary)
else:
log.error(summary)
print "summary: %s" % summary
print "Summary: %s" % summary
return retval
def parse_opts():
parser = optparse.OptionParser()
parser.add_option("-v", "--verbose", dest="verbose", help="Increase logging verbosity", action="store_true", default=False)
parser.add_option("-H", "--host", dest="hostname", help="Hostname to use")
parser.add_option("-t", "--timeout", dest="timeout", help="Test timeout in seconds; default is 240", default=240, type="int")
parser.add_option("-t", "--timeout", dest="timeout", help="Test timeout in seconds; default is 300", default=300, type="int")
opts, args = parser.parse_args()
......@@ -108,20 +113,23 @@ def runCommandChild(cmd, args):
finally:
os._exit(127)
def runCommandParent(r, pid, opts):
def runCommandParent(r, pid, opts, cmsruntimeout):
flags = fcntl.fcntl(r, fcntl.F_GETFL)
flags |= os.O_NONBLOCK
fcntl.fcntl(r, fcntl.F_SETFL, flags)
xlist = []
rlist = [r]
wlist = []
timeout = opts.endtime - time.time()
if cmsruntimeout > 0:
endtime = min(opts.endtime, time.time() + cmsruntimeout)
else:
endtime = opts.endtime
timeout = endtime - time.time()
stdout = ""
exitCode = -1
while (timeout >= 0) and (r not in xlist):
rlist, wlist, xlist = select.select(rlist, wlist, xlist, timeout)
timeout = opts.endtime - time.time()
timeout = endtime - time.time()
if r in rlist:
newstr = os.read(r, 1024)
stdout += newstr
......@@ -141,18 +149,18 @@ def runCommandParent(r, pid, opts):
exitCode = -1
if (timeout < 0) and (exitCode < 0):
os.kill(pid, signal.SIGKILL)
print "Killed CMSSW child (pid %d) due to timeout." % pid
log.error("Killed CMSSW child (pid %d) due to timeout." % pid)
if exitCode < 0:
pid, exitCode = os.waitpid(pid, 0)
return stdout, exitCode
def runCommand(cmd, args, opts, combineStd=False):
def runCommand(cmd, args, opts, cmsruntimeout=0, combineStd=False):
r, w = os.pipe()
try:
pid = os.fork()
if pid: # parent
os.close(w)
return runCommandParent(r, pid, opts)
return runCommandParent(r, pid, opts, cmsruntimeout)
else:
os.close(r)
os.dup2(w, 1)
......@@ -340,9 +348,7 @@ def main():
rndm_site = (rndm_site + 1) % len(CSWNFB_SITES)
log.info("Fallback site: %s" % CSWNFB_SITES[rndm_site])
xrootd_file = "/store/test/xrootd/" + CSWNFB_SITES[rndm_site] + CSWNFB_FILES[rndm_file]
log.info("Xrootd file we will test: %s" % xrootd_file)
log.info("Xrootd file we will test: %s" % CSWNFB_FILES[rndm_file])
shutil.rmtree("xrootd-fallback", 1)
try:
......@@ -366,20 +372,29 @@ def main():
if opts.verbose:
level = 2
fd = open("test_xrootd.py", "w")
fd.write(cms_file % (xrootd_file, level))
fd.close()
exitCode = evalCommand("scramv1", ["runtime", "-sh"], opts)
if exitCode:
return print_summary("Failure when setting up scramv1 runtime (CMSSW %s, SCRAM_ARCH=%s)" % (cmssw_version, scram_arch), NAG_CRITICAL)
no_trial = 0
while ( no_trial < 3 ):
stdout, exitCode = runCommand("cmsRun", ["test_xrootd.py"], opts, combineStd=True)
if (opts.endtime - time.time()) < 60:
log.error("Timed out before reaching 3 attempts limit")
exitCode = 8015
break
xrootd_file = "/store/test/xrootd/" + CSWNFB_SITES[rndm_site] + CSWNFB_FILES[rndm_file]
log.info("Xrootd fullpath: %s" % xrootd_file)
fd = open("test_xrootd.py", "w")
fd.write(cms_file % (xrootd_file, level))
fd.close()
stdout, exitCode = runCommand("cmsRun", ["test_xrootd.py"], opts, 150, combineStd=True)
no_trial += 1
for line in stdout.split('\n'):
if re.search('opened', line) or re.search('redirect', line):
if re.search('opened', line) or re.search('redirect', line) or re.search('Reading', line) or re.search('server', line):
print line
maxlen = 12*1024
......@@ -392,18 +407,25 @@ def main():
if ( exitCode == 0 ):
break
log.error("Failed cmsRun. Output:")
if (opts.endtime - time.time()) < 60:
log.error("Not enough time left for another try")
break
log.error("Failed cmsRun output:")
print stdout
rndm_site = (rndm_site + 1) % len(CSWNFB_SITES)
if ( CSWNFB_SITES[rndm_site] == siteName ):
rndm_site = (rndm_site + 1) % len(CSWNFB_SITES)
log.info("retrying with fallback site: %s" % CSWNFB_SITES[rndm_site])
log.info("Retrying with fallback site: %s" % CSWNFB_SITES[rndm_site])
if exitCode:
returnCode = NAG_CRITICAL
if numCatalogs > 1: # Fallback correctly configured, so only WARN
if exitCode == 8015:
returnCode = NAG_WARNING
return print_summary("Test reached timeout before the third attempt; exit code %s" % exitCode, returnCode)
returnCode = NAG_CRITICAL
return print_summary("Failed cmsRun; exit code %d" % exitCode, returnCode)
log.info("Successful cmsRun.")
# Return the correct exit code.
......
......@@ -49,7 +49,7 @@ if [ $useSquid == 1 ] ; then
fi
wgetOutput=`mktemp`
cmd="wget --header=\"PRIVATE-TOKEN: 6STzvBkTLS7fqevzuehi\" -O $file $url"
cmd="curl --request GET --header \"PRIVATE-TOKEN: kuD9s3WrDGNXZ8BvxFV2\" $url -o $file"
cmdFile=`mktemp`
echo "$cmd > $wgetOutput 2>&1" > $cmdFile
......
......@@ -4,7 +4,7 @@
Summary: WLCG Compliant Probes from %{site}
Name: nagios-plugins-wlcg-org.cms
Version: 1.1.54
Version: 1.1.56
Release: 1%{?dist}
License: GPL
......@@ -52,7 +52,11 @@ install --directory %{buildroot}/etc/cron.d
/etc/cron.d/cms_glexec
%changelog
* Thu Dec 6 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.54-1.
* Fri Feb 22 2019 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.56-1.
* Improved timeout logic to xrootd-fallback
* Fri Feb 1 2019 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.55-1.
- Made xrootd fallback test critical
* Thu Dec 6 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.54-1.
- fixes in xrootd-fallback and moved CMSSW to CMSSW_9_2_6 in tests
* Tue Nov 6 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.53-1.
- fixes in xrootd-fallback, remotestageout, mc and SRM tests
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment