Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
etf
cmssam
Commits
2290ae64
Commit
2290ae64
authored
Dec 11, 2018
by
Marian Babik
Browse files
added abrt; fixed xroot and srm tests
parent
f4af54b3
Changes
5
Hide whitespace changes
Inline
Side-by-side
Dockerfile
View file @
2290ae64
...
...
@@ -18,8 +18,8 @@ RUN yum -y install voms globus-gsi-sysconfig globus-gsi-cert-utils globus-gssapi
# Condor client
RUN
yum
-y
install
condor condor-python
# SRM
RUN
yum
-y
install
lcg-util gfal2-util lcg-util
-python gfal
-python
globus-ftp-client
\
# SRM
todo: test removing globus deps
RUN
yum
-y
install
gfal2-all gfal2
-python gfal
2-util
globus-ftp-client
\
globus-gass-transfer globus-ftp-control globus-xio globus-gssapi-error
\
globus-gsi-sysconfig globus-gsi-openssl-error globus-openssl-module
\
globus-gsi-proxy-ssl
...
...
@@ -32,6 +32,10 @@ RUN yum -y install xrootd-python-4.7.1-1.osg34.el7 xrootd-client-4.7.1-1.osg34.e
COPY
docker/etf-cms/config/grid-env.sh /etc/profile.d/
RUN
echo
"source /etc/profile.d/grid-env.sh"
>>
/opt/omd/sites/
$CHECK_MK_SITE
/.profile
# ABRTD
RUN
yum
-y
install
abrt
COPY
docker/etf-cms/config/mailx_event.conf /etc/
# VOMS config
# RUN mkdir -p /etc/vomses/
# COPY ./config/cms-lcg-voms2.cern.ch /etc/vomses/
...
...
SiteTests/SE/cmssam_xrootd_endpnt.py
View file @
2290ae64
#!/usr/bin/python
#!/usr/bin/
env
python
# ########################################################################### #
#
# SAM xrootd file access probe of CMS
...
...
SiteTests/SE/srmvometrics.py
View file @
2290ae64
...
...
@@ -45,7 +45,7 @@ SAM (Service Availability Monitoring)
import
os
import
sys
import
getopt
import
time
#@UnresolvedImport
import
time
import
commands
import
errno
import
re
...
...
@@ -56,6 +56,7 @@ import pickle
import
datetime
import
urlparse
import
filecmp
import
requests
try
:
from
gridmon
import
probe
...
...
@@ -63,7 +64,7 @@ try:
from
gridmon
import
gridutils
from
gridmon.process
import
signaling
import
gfal2
except
ImportError
,
e
:
except
ImportError
as
e
:
summary
=
"UNKNOWN: Error loading modules : %s"
%
(
e
)
sys
.
stdout
.
write
(
summary
+
'
\n
'
)
sys
.
stdout
.
write
(
summary
+
'
\n
sys.path: %s
\n
'
%
str
(
sys
.
path
))
...
...
@@ -79,6 +80,7 @@ LCG_UTIL_TIMEOUT_SRM = 180
gfal2
.
set_verbose
(
gfal2
.
verbose_level
.
debug
)
class
SRMVOMetrics
(
probe
.
MetricGatherer
)
:
"""A Metric Gatherer specific for SRM."""
...
...
@@ -171,9 +173,7 @@ class SRMVOMetrics(probe.MetricGatherer) :
},
}
def
__init__
(
self
,
tuples
):
probe
.
MetricGatherer
.
__init__
(
self
,
tuples
,
'SRM'
)
self
.
usage
=
""" Metrics specific options:
...
...
@@ -224,9 +224,9 @@ class SRMVOMetrics(probe.MetricGatherer) :
curhour
=
datetime
.
datetime
.
now
().
hour
self
.
_fileHistoryVoInfoDictionary
=
self
.
workdir_metric
+
"/VOInfoDictionary_%s"
%
curhour
self
.
_fileVoInfoDictionary
=
self
.
workdir_metric
+
"/VOInfoDictionary"
#Read dictionary from current cache
#
Read dictionary from current cache
try
:
#Clean up stale current cache entries (older than 3 days)
#
Clean up stale current cache entries (older than 3 days)
try
:
modtime
=
os
.
path
.
getmtime
(
self
.
_fileVoInfoDictionary
)
if
(
time
.
time
()
-
modtime
>
3
*
86400
):
...
...
@@ -299,7 +299,6 @@ class SRMVOMetrics(probe.MetricGatherer) :
"""
try
:
os
.
unlink
(
self
.
_fileLock
)
except
OSError
:
pass
def
saveVoInfoDictionary
(
self
,
filename
):
fp
=
open
(
filename
,
"w"
)
...
...
@@ -313,29 +312,29 @@ class SRMVOMetrics(probe.MetricGatherer) :
return
voInfoDict
def
weightEndpointCriticality
(
self
,
VOtest
):
DetailedMsg
=
''
CriticalResult
=
[]
DetailedMsg
=
''
CriticalResult
=
[]
for
srmendpt
in
self
.
_voInfoDictionary
.
keys
():
try
:
try
:
criticality
=
self
.
_voInfoDictionary
[
srmendpt
][
'criticality'
]
try
:
criticality
=
self
.
_voInfoDictionary
[
srmendpt
][
'criticality'
]
except
KeyError
:
criticality
=
1
if
criticality
==
1
:
CriticalResult
.
append
(
self
.
_voInfoDictionary
[
srmendpt
][
VOtest
][
0
])
# DetailedMsg = DetailedMsg + str(self._voInfoDictionary[srmendpt])
DetailedMsg
=
DetailedMsg
+
\
str
(
self
.
_voInfoDictionary
[
srmendpt
][
'space_token'
])
+
\
" critical= "
+
str
(
criticality
)
+
\
" "
+
str
(
self
.
_voInfoDictionary
[
srmendpt
][
VOtest
][
1
])
+
\
" file= "
+
str
(
self
.
_voInfoDictionary
[
srmendpt
][
'fn'
])
+
\
"
\n
"
# self.printd('VO specific Detailed Output: %s' % str(DetailedMsg))
except
IndexError
:
return
'UNKNOWN'
,
'No SRM endpoints found in internal dictionary'
except
KeyError
:
criticality
=
1
if
criticality
==
1
:
CriticalResult
.
append
(
self
.
_voInfoDictionary
[
srmendpt
][
VOtest
][
0
])
#DetailedMsg = DetailedMsg + str(self._voInfoDictionary[srmendpt])
DetailedMsg
=
DetailedMsg
+
\
str
(
self
.
_voInfoDictionary
[
srmendpt
][
'space_token'
])
+
\
" critical= "
+
str
(
criticality
)
+
\
" "
+
str
(
self
.
_voInfoDictionary
[
srmendpt
][
VOtest
][
1
])
+
\
" file= "
+
str
(
self
.
_voInfoDictionary
[
srmendpt
][
'fn'
])
+
\
"
\n
"
#self.printd('VO specific Detailed Output: %s' % str(DetailedMsg))
except
IndexError
:
return
(
'UNKNOWN'
,
'No SRM endpoints found in internal dictionary'
)
except
KeyError
:
return
(
'UNKNOWN'
,
'No test results found in internal dictionary for SRM endpoint'
)
#print " GLOBAL result \n \n \n \n \n "
return
'UNKNOWN'
,
'No test results found in internal dictionary for SRM endpoint'
# print " GLOBAL result \n \n \n \n \n "
## oredering criticality
self
.
printd
(
'VO specific Detailed Output: %s'
%
str
(
DetailedMsg
))
if
'CRITICAL'
in
CriticalResult
:
# it's enough one CRIT
...
...
@@ -359,18 +358,17 @@ class SRMVOMetrics(probe.MetricGatherer) :
def
metricAllLHCb
(
self
):
return
self
.
metricAll
(
'AllLHCb'
)
def
metricGetPFNFromTFC
(
self
,
testLFN
=
"/store/unmerged/SAM/testSRM"
):
def
metricGetPFNFromTFC
(
self
,
testLFN
=
"/store/unmerged/SAM/testSRM"
):
"""Get full SRM endpoint(s) and storage areas from PhEDEx DataService.
"""
try
:
self
.
__workdir_lock
()
except
IOError
,
e
:
except
IOError
as
e
:
self
.
printd
(
'Failed to lock. %s'
%
str
(
e
))
return
'UNKNOWN'
,
'UNKNOWN: Failed to lock working directory.'
#URLs for PhEDEx DataService for lfn2pfn
# URLs for PhEDEx DataService for lfn2pfn
tfcURL
=
"https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn?node="
pfnMatchURL
=
"&lfn="
pfnProtocolOption
=
"&protocol=srmv2"
...
...
@@ -384,6 +382,7 @@ class SRMVOMetrics(probe.MetricGatherer) :
opener
=
urllib2
.
build_opener
()
header
=
'grid-monitoring-probes-org.cms.SRM-GetPFNFromTFC/1.0 (CMS) %s/%s %s/%s (%s)'
%
(
urllib2
.
__name__
,
urllib2
.
__version__
,
platform
.
system
(),
platform
.
release
(),
platform
.
processor
())
opener
.
addheaders
=
[(
'User-agent'
,
header
)]
headers
=
{
'user-agent'
:
header
}
# LFN path for file to test transfers
self
.
printd
(
'The LFN used for testing will be in: '
+
testLFN
)
...
...
@@ -391,16 +390,16 @@ class SRMVOMetrics(probe.MetricGatherer) :
try
:
self
.
printd
(
"Contacting PhEDEx dataservice to perform SEName-to-PhEDExNodeName at URL:"
)
self
.
printd
(
seNamesURL
%
nodeName
)
seNames
=
opener
.
open
(
seNamesURL
%
nodeName
)
seNamesJSON
=
simplejson
.
load
(
seNames
)
req
=
requests
.
get
(
seNamesURL
%
nodeName
,
headers
=
headers
,
verify
=
False
,
timeout
=
120
)
req
.
raise_for_status
()
seNames
=
req
.
content
seNamesJSON
=
simplejson
.
loads
(
seNames
)
phedexNodeNames
=
seNamesJSON
[
u
'phedex'
][
u
'senames'
]
except
(
urllib2
.
URLError
,
KeyError
):
except
(
requests
.
HTTPError
,
KeyError
):
self
.
printd
(
'WARNING: Unable to open PhEDEx DataService senames API to perform SEName-to-PhEDExNodeName matching for SEName %s'
%
nodeName
)
if
len
(
self
.
_voInfoDictionary
):
self
.
printd
(
"WARNING: using cached PFN"
)
# Update timestamp/uuid in cached PFN
# Update timestamp/uuid in cached PFN
for
pfn
in
self
.
_voInfoDictionary
:
try
:
self
.
_voInfoDictionary
[
pfn
][
'fn'
]
=
self
.
_voInfoDictionary
[
pfn
][
'fntemp'
]
%
(
str
(
int
(
time
.
time
())),
samutils
.
uuidstr
())
...
...
@@ -408,13 +407,11 @@ class SRMVOMetrics(probe.MetricGatherer) :
self
.
printd
(
pfn
+
" : "
+
str
(
self
.
_voInfoDictionary
[
pfn
]))
except
KeyError
:
self
.
printd
(
"WARNING: no cached PFN found"
)
return
(
'WARNING'
,
"WARNING: Unable to open PhEDEx DataService senames API, no cached PFN found"
)
return
(
'OK'
,
"WARNING: Unable to open PhEDEx DataService senames API, using cached PFN"
)
return
'WARNING'
,
"WARNING: Unable to open PhEDEx DataService senames API, no cached PFN found"
return
'OK'
,
"WARNING: Unable to open PhEDEx DataService senames API, using cached PFN"
else
:
self
.
printd
(
"WARNING: no cached PFN found"
)
return
(
'WARNING'
,
"WARNING: Unable to open PhEDEx DataService senames API, no cached PFN found"
)
return
'WARNING'
,
"WARNING: Unable to open PhEDEx DataService senames API, no cached PFN found"
outputList
=
{}
...
...
@@ -441,15 +438,17 @@ class SRMVOMetrics(probe.MetricGatherer) :
self
.
printd
(
pfnUrl
)
try
:
pfnFile
=
opener
.
open
(
pfnUrl
)
pfnJSON
=
simplejson
.
load
(
pfnFile
)
req
=
requests
.
get
(
pfnUrl
,
headers
=
headers
,
verify
=
False
,
timeout
=
120
)
req
.
raise_for_status
()
pfnFile
=
req
.
content
pfnJSON
=
simplejson
.
loads
(
pfnFile
)
pfn
=
(((
pfnJSON
[
u
'phedex'
])[
u
'mapping'
])[
0
])[
u
'pfn'
]
spacetoken
=
(((
pfnJSON
[
u
'phedex'
])[
u
'mapping'
])[
0
])[
u
'space_token'
]
except
(
urllib2
.
URL
Error
,
KeyError
):
except
(
requests
.
HTTP
Error
,
KeyError
):
self
.
printd
(
'WARNING: Unable to open PhEDEx DataService lfn2pfn URL to perform LFN-to-PFN matching for Site %s'
%
siteName
)
continue
if
pfn
==
None
:
if
not
pfn
:
self
.
printd
(
"ERROR: LFN did not match to any PFN - probably the TFC does not contain any rule for the srmv2 protocol."
)
continue
...
...
@@ -463,15 +462,15 @@ class SRMVOMetrics(probe.MetricGatherer) :
if
re
.
compile
(
"^srm://.+srm/managerv2\?SFN=.+$"
).
match
(
pfn
)
or
re
.
compile
(
"^srm://.+srm/v2/server\?SFN=.+$"
).
match
(
pfn
):
pfntonode
=
re
.
sub
(
":.+$"
,
""
,
re
.
sub
(
"^srm://"
,
""
,
pfn
))
if
pfntonode
!=
nodeName
:
if
pfntonode
!=
nodeName
:
self
.
printd
(
"WARNING: the resulting PFN matches to SRM "
+
pfntonode
+
" instead of SRM "
+
nodeName
)
continue
else
:
fntemp
=
self
.
_fileSRMPattern
%
(
spacetokendesc
,
'%s'
,
'%s'
)
fntemp
=
self
.
_fileSRMPattern
%
(
spacetokendesc
,
'%s'
,
'%s'
)
fn
=
fntemp
%
(
str
(
int
(
time
.
time
())),
samutils
.
uuidstr
())
outputList
[
pfn
]
=
{
'fntemp'
:
fntemp
,
'fn'
:
fn
,
'space_token'
:
spacetoken
,
'space_token_get'
:
spacetoken
,
'userspace'
:
testLFN
}
outputList
[
pfn
]
=
{
'fntemp'
:
fntemp
,
'fn'
:
fn
,
'space_token'
:
spacetoken
,
'space_token_get'
:
spacetoken
,
'userspace'
:
testLFN
}
elif
pfn
.
startswith
(
"gsiftp://"
):
pfntonode
=
urlparse
.
urlparse
(
pfn
).
hostname
pfntonode
=
urlparse
.
urlparse
(
pfn
).
hostname
fntemp
=
self
.
_fileSRMPattern
%
(
spacetokendesc
,
'%s'
,
'%s'
)
fn
=
fntemp
%
(
str
(
int
(
time
.
time
())),
samutils
.
uuidstr
())
outputList
[
pfn
]
=
{
'fntemp'
:
fntemp
,
'fn'
:
fn
,
'space_token'
:
spacetoken
,
'space_token_get'
:
spacetoken
,
'userspace'
:
testLFN
}
...
...
@@ -486,10 +485,12 @@ class SRMVOMetrics(probe.MetricGatherer) :
# Extract a random PFN from the dictionary of PFN matches. It will be used for testing, other PFN matches will be ignored
# Print warning if not all PFN matches are the same.
if
len
(
outputList
)
==
0
:
self
.
printd
(
"WARNING: "
+
nodeName
+
" not found in SRM list"
)
self
.
printd
(
"WARNING: This error usually means that the site is not running PhEDEx agents in the Prod instance,"
)
self
.
printd
(
"WARNING: or that the TrivialFileCatalog published by the site's PhEDEx agents doesn't have a valid srmv2 protocol rule for "
+
nodeName
)
if
len
(
outputList
)
==
0
:
self
.
printd
(
"WARNING: "
+
nodeName
+
" not found in SRM list"
)
self
.
printd
(
"WARNING: This error usually means that the site is not running PhEDEx agents in the Prod instance,"
)
self
.
printd
(
"WARNING: or that the TrivialFileCatalog published by the site's PhEDEx agents doesn't have a valid srmv2 protocol rule for "
+
nodeName
)
if
len
(
self
.
_voInfoDictionary
):
self
.
printd
(
"WARNING: using cached PFN"
)
# Update timestamp/uuid in cached PFN
...
...
@@ -497,20 +498,19 @@ class SRMVOMetrics(probe.MetricGatherer) :
try
:
self
.
_voInfoDictionary
[
pfn
][
'fn'
]
=
self
.
_voInfoDictionary
[
pfn
][
'fntemp'
]
%
(
str
(
int
(
time
.
time
())),
samutils
.
uuidstr
())
self
.
printd
(
"The PFN path used for testing will be:"
)
self
.
printd
(
pfn
+
" : "
+
str
(
self
.
_voInfoDictionary
[
pfn
]))
self
.
printd
(
pfn
+
" : "
+
str
(
self
.
_voInfoDictionary
[
pfn
]))
except
KeyError
:
self
.
printd
(
"WARNING: no cached PFN found"
)
return
(
'WARNING'
,
"WARNING: "
+
nodeName
+
" not found in SRM list, no cached PFN found"
)
return
(
'OK'
,
"WARNING: "
+
nodeName
+
" not found in SRM list, using cached PFN"
)
return
(
'WARNING'
,
"WARNING: "
+
nodeName
+
" not found in SRM list, no cached PFN found"
)
return
(
'OK'
,
"WARNING: "
+
nodeName
+
" not found in SRM list, using cached PFN"
)
else
:
self
.
printd
(
"WARNING: no cached PFN found"
)
return
(
'WARNING'
,
"WARNING: "
+
nodeName
+
" not found in SRM list, no cached PFN found"
)
return
(
'WARNING'
,
"WARNING: "
+
nodeName
+
" not found in SRM list, no cached PFN found"
)
else
:
self
.
_voInfoDictionary
=
outputList
else
:
self
.
_voInfoDictionary
=
outputList
for
outputPfns
in
outputList
:
self
.
printd
(
"The PFN path used for testing will be:"
)
self
.
printd
(
outputPfns
+
" : "
+
str
(
outputList
[
outputPfns
]))
...
...
@@ -557,16 +557,17 @@ class SRMVOMetrics(probe.MetricGatherer) :
self
.
printd
(
str
(
agis_endpoint_info
))
self
.
printd
(
str
(
self
.
_voInfoDictionary
))
try
:
fp
=
open
(
self
.
_ldap_fileEndptSAPath
,
"w"
)
for
info
in
agis_endpoint_info
:
ep
=
info
.
split
()[
0
]
+
'
\n
'
fp
.
write
(
ep
)
fp
.
close
()
except
IOError
,
e
:
try
:
os
.
unlink
(
self
.
_ldap_fileEndptSAPath
)
except
OSError
:
pass
return
(
'UNKNOWN'
,
'IOError: %s'
%
str
(
e
))
fp
=
open
(
self
.
_ldap_fileEndptSAPath
,
"w"
)
for
info
in
agis_endpoint_info
:
ep
=
info
.
split
()[
0
]
+
'
\n
'
fp
.
write
(
ep
)
fp
.
close
()
except
IOError
as
e
:
try
:
os
.
unlink
(
self
.
_ldap_fileEndptSAPath
)
except
OSError
:
pass
return
(
'UNKNOWN'
,
'IOError: %s'
%
str
(
e
))
#print self._ldap_fileEndptSAPath
...
...
docker/etf-cms/config/mailx_event.conf
0 → 100644
View file @
2290ae64
EVENT
=
notify
# do not rely on the default config nor on the config file
Mailx_Subject
=
"[abrt] $(cat package || cat executable): $(cat crash_function && echo "
():
") $(cat reason || (cat executable && echo "
crashed
"))"
\
Mailx_EmailFrom
=
"ABRT Daemon <DoNotReply>"
\
Mailx_EmailTo
=
"root@localhost"
\
reporter
-
mailx
--
notify
-
only
EVENT
=
notify
-
dup
# do not rely on the default config nor on the config file
Mailx_Subject
=
"[abrt] $(cat package || cat executable): $(cat crash_function && echo "
():
") $(cat reason || (cat executable && echo "
crashed
"))"
\
Mailx_EmailFrom
=
"ABRT Daemon <DoNotReply>"
\
Mailx_EmailTo
=
"root@localhost"
\
reporter
-
mailx
--
notify
-
only
docker/etf-cms/docker-entrypoint.sh
View file @
2290ae64
...
...
@@ -92,6 +92,13 @@ else
/usr/bin/disable_nstream
fi
if
[
"
${
ABRT_ENABLED
}
"
-eq
"1"
]
;
then
echo
"Enabling abrt ..."
sed
-e
"s/OpenGPGCheck = yes/OpenGPGCheck = no/g"
-i
/etc/abrt/abrt-action-save-package-data.conf
cp
-f
/etc/mailx_event.conf /etc/libreport/events.d/mailx_event.conf
/usr/sbin/abrtd
fi
echo
"Fetching CMS credentials ..."
su etf
-c
"/usr/lib/nagios/plugins/globus/refresh_proxy --vo-fqan /cms/Role=lcgadmin --myproxyuser nagios -H myproxy.cern.ch -t 120 --key /opt/omd/sites/etf/etc/nagios/globus/etf_srv_key.pem --vo cms --lifetime 24 --name NagiosRetrieve-ETF-cms -x /opt/omd/sites/etf/etc/nagios/globus/userproxy.pem--cms-Role_lcgadmin --cert /opt/omd/sites/etf/etc/nagios/globus/etf_srv_cert.pem"
su etf
-c
"/usr/lib/nagios/plugins/globus/refresh_proxy --vo-fqan /cms/Role=production --myproxyuser nagios -H myproxy.cern.ch -t 120 --key /opt/omd/sites/etf/etc/nagios/globus/etf_srv_key.pem --vo cms --lifetime 24 --name NagiosRetrieve-ETF-cms -x /opt/omd/sites/etf/etc/nagios/globus/userproxy.pem--cms-Role_production --cert /opt/omd/sites/etf/etc/nagios/globus/etf_srv_cert.pem"
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment