Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
etf
cmssam
Commits
7bfde71f
Commit
7bfde71f
authored
Feb 22, 2019
by
Andrea Sciaba
Browse files
Merge branch 'qa' into 'qa'
Timeout/logic improvements for xrootd-fallback test See merge request
!5
parents
5de0ffd0
0f6405e2
Changes
2
Hide whitespace changes
Inline
Side-by-side
SiteTests/testjob/tests/CE-cms-xrootd-fallback
View file @
7bfde71f
...
...
@@ -27,9 +27,9 @@ CSWNFB_FILES = [ "/store/mc/SAM/GenericTTbar/AODSIM/" + \
"/store/mc/SAM/GenericTTbar/AODSIM/"
+
\
"CMSSW_9_2_6_91X_mcRun1_realistic_v2-v1/00000/"
+
\
"CE860B10-5D76-E711-BCA8-FA163EAA761A.root"
]
CSWNFB_SITES
=
[
"T1_FR_CCIN2P3"
,
"T
1_RU_JINR"
,
"T2_CN_Beijing"
,
"T2_BE_IIHE
"
,
\
"T2_FR_GRIF_LLR"
,
"T2_HU_Budapest"
,
"T2_UK_London_Brunel"
,
\
"T2_UK_London_IC"
,
"T2_US_Nebraska"
,
"T2_US_Wisconsin"
]
CSWNFB_SITES
=
[
"T1_FR_CCIN2P3"
,
"T
2_US_Nebraska"
,
"T1_RU_JINR"
,
"T2_UK_London_Brunel
"
,
\
"T2_CN_Beijing"
,
"T2_BE_IIHE"
,
"T2_FR_GRIF_LLR"
,
"T2_HU_Budapest"
,
\
"T2_UK_London_IC"
,
"T2_US_Wisconsin"
]
...
...
@@ -47,19 +47,24 @@ process.SiteLocalConfigService = cms.Service("SiteLocalConfigService",
overrideSourceCacheHintDir = cms.untracked.string("application-only"),
)
process.dump = cms.EDAnalyzer("EventContentAnalyzer", listContent=cms.untracked.bool(False), getData=cms.untracked.bool(True))
process.dump = cms.EDAnalyzer("EventContentAnalyzer",
listContent=cms.untracked.bool(False),
verboseForModuleLabels = cms.untracked.vstring("recoTracks_generalTracks"),
getDataForModuleLabels=cms.untracked.vstring("recoTracks_generalTracks"),
getData=cms.untracked.bool(True),
)
process.load("FWCore.MessageService.MessageLogger_cfi")
process.MessageLogger.cerr.FwkReport.reportEvery = 1
process.maxEvents = cms.untracked.PSet(
input = cms.untracked.int32(1
0
)
input = cms.untracked.int32(1)
)
process.p = cms.EndPath(process.dump)
"""
def
configure_logging
(
lvl
=
logging
.
INFO
):
logger
=
logging
.
getLogger
(
"cms.CE.xrootd-
access
"
)
logger
=
logging
.
getLogger
(
"cms.CE.xrootd-
fallback
"
)
logger
.
setLevel
(
lvl
)
handler
=
logging
.
StreamHandler
(
sys
.
stdout
)
formatter
=
logging
.
Formatter
(
fmt
=
"[%(process)d] %(asctime)s [%(levelname)07s]: %(message)s"
)
...
...
@@ -73,14 +78,14 @@ def print_summary(summary, retval):
log
.
info
(
summary
)
else
:
log
.
error
(
summary
)
print
"
s
ummary: %s"
%
summary
print
"
S
ummary: %s"
%
summary
return
retval
def
parse_opts
():
parser
=
optparse
.
OptionParser
()
parser
.
add_option
(
"-v"
,
"--verbose"
,
dest
=
"verbose"
,
help
=
"Increase logging verbosity"
,
action
=
"store_true"
,
default
=
False
)
parser
.
add_option
(
"-H"
,
"--host"
,
dest
=
"hostname"
,
help
=
"Hostname to use"
)
parser
.
add_option
(
"-t"
,
"--timeout"
,
dest
=
"timeout"
,
help
=
"Test timeout in seconds; default is
24
0"
,
default
=
24
0
,
type
=
"int"
)
parser
.
add_option
(
"-t"
,
"--timeout"
,
dest
=
"timeout"
,
help
=
"Test timeout in seconds; default is
30
0"
,
default
=
30
0
,
type
=
"int"
)
opts
,
args
=
parser
.
parse_args
()
...
...
@@ -108,20 +113,23 @@ def runCommandChild(cmd, args):
finally
:
os
.
_exit
(
127
)
def
runCommandParent
(
r
,
pid
,
opts
):
def
runCommandParent
(
r
,
pid
,
opts
,
cmsruntimeout
):
flags
=
fcntl
.
fcntl
(
r
,
fcntl
.
F_GETFL
)
flags
|=
os
.
O_NONBLOCK
fcntl
.
fcntl
(
r
,
fcntl
.
F_SETFL
,
flags
)
xlist
=
[]
rlist
=
[
r
]
wlist
=
[]
timeout
=
opts
.
endtime
-
time
.
time
()
if
cmsruntimeout
>
0
:
endtime
=
min
(
opts
.
endtime
,
time
.
time
()
+
cmsruntimeout
)
else
:
endtime
=
opts
.
endtime
timeout
=
endtime
-
time
.
time
()
stdout
=
""
exitCode
=
-
1
while
(
timeout
>=
0
)
and
(
r
not
in
xlist
):
rlist
,
wlist
,
xlist
=
select
.
select
(
rlist
,
wlist
,
xlist
,
timeout
)
timeout
=
opts
.
endtime
-
time
.
time
()
timeout
=
endtime
-
time
.
time
()
if
r
in
rlist
:
newstr
=
os
.
read
(
r
,
1024
)
stdout
+=
newstr
...
...
@@ -141,18 +149,18 @@ def runCommandParent(r, pid, opts):
exitCode
=
-
1
if
(
timeout
<
0
)
and
(
exitCode
<
0
):
os
.
kill
(
pid
,
signal
.
SIGKILL
)
print
"Killed CMSSW child (pid %d) due to timeout."
%
pid
log
.
error
(
"Killed CMSSW child (pid %d) due to timeout."
%
pid
)
if
exitCode
<
0
:
pid
,
exitCode
=
os
.
waitpid
(
pid
,
0
)
return
stdout
,
exitCode
def
runCommand
(
cmd
,
args
,
opts
,
combineStd
=
False
):
def
runCommand
(
cmd
,
args
,
opts
,
cmsruntimeout
=
0
,
combineStd
=
False
):
r
,
w
=
os
.
pipe
()
try
:
pid
=
os
.
fork
()
if
pid
:
# parent
os
.
close
(
w
)
return
runCommandParent
(
r
,
pid
,
opts
)
return
runCommandParent
(
r
,
pid
,
opts
,
cmsruntimeout
)
else
:
os
.
close
(
r
)
os
.
dup2
(
w
,
1
)
...
...
@@ -370,6 +378,11 @@ def main():
no_trial
=
0
while
(
no_trial
<
3
):
if
(
opts
.
endtime
-
time
.
time
())
<
60
:
log
.
error
(
"Timed out before reaching 3 attempts limit"
)
exitCode
=
8015
break
xrootd_file
=
"/store/test/xrootd/"
+
CSWNFB_SITES
[
rndm_site
]
+
CSWNFB_FILES
[
rndm_file
]
log
.
info
(
"Xrootd fullpath: %s"
%
xrootd_file
)
...
...
@@ -377,10 +390,11 @@ def main():
fd
.
write
(
cms_file
%
(
xrootd_file
,
level
))
fd
.
close
()
stdout
,
exitCode
=
runCommand
(
"cmsRun"
,
[
"test_xrootd.py"
],
opts
,
combineStd
=
True
)
stdout
,
exitCode
=
runCommand
(
"cmsRun"
,
[
"test_xrootd.py"
],
opts
,
150
,
combineStd
=
True
)
no_trial
+=
1
for
line
in
stdout
.
split
(
'
\n
'
):
if
re
.
search
(
'opened'
,
line
)
or
re
.
search
(
'redirect'
,
line
):
if
re
.
search
(
'opened'
,
line
)
or
re
.
search
(
'redirect'
,
line
)
or
re
.
search
(
'Reading'
,
line
)
or
re
.
search
(
'server'
,
line
):
print
line
maxlen
=
12
*
1024
...
...
@@ -393,18 +407,25 @@ def main():
if
(
exitCode
==
0
):
break
log
.
error
(
"Failed cmsRun. Output:"
)
if
(
opts
.
endtime
-
time
.
time
())
<
60
:
log
.
error
(
"Not enough time left for another try"
)
break
log
.
error
(
"Failed cmsRun output:"
)
print
stdout
rndm_site
=
(
rndm_site
+
1
)
%
len
(
CSWNFB_SITES
)
if
(
CSWNFB_SITES
[
rndm_site
]
==
siteName
):
rndm_site
=
(
rndm_site
+
1
)
%
len
(
CSWNFB_SITES
)
log
.
info
(
"
r
etrying with fallback site: %s"
%
CSWNFB_SITES
[
rndm_site
])
log
.
info
(
"
R
etrying with fallback site: %s"
%
CSWNFB_SITES
[
rndm_site
])
if
exitCode
:
returnCode
=
NAG_CRITICAL
if
numCatalogs
>
1
:
# Fallback correctly configured, so only WARN
if
exitCode
==
8015
:
returnCode
=
NAG_WARNING
return
print_summary
(
"Test reached timeout before the third attempt; exit code %s"
%
exitCode
,
returnCode
)
returnCode
=
NAG_CRITICAL
return
print_summary
(
"Failed cmsRun; exit code %d"
%
exitCode
,
returnCode
)
log
.
info
(
"Successful cmsRun."
)
# Return the correct exit code.
...
...
nagios/grid-monitoring-probes-org.cms-etf.spec
View file @
7bfde71f
...
...
@@ -4,7 +4,7 @@
Summary: WLCG Compliant Probes from %{site}
Name: nagios-plugins-wlcg-org.cms
Version: 1.1.5
4
Version: 1.1.5
5
Release: 1%{?dist}
License: GPL
...
...
@@ -52,6 +52,8 @@ install --directory %{buildroot}/etc/cron.d
/etc/cron.d/cms_glexec
%changelog
* Fri Feb 1 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.55-1.
- Made xrootd fallback test critical
* Thu Dec 6 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.54-1.
- fixes in xrootd-fallback and moved CMSSW to CMSSW_9_2_6 in tests
* Tue Nov 6 2018 Andrea Sciaba <Andrea.Sciaba@cern.ch> 1.1.53-1.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment