Commit 25734dc8 authored by Marian Babik's avatar Marian Babik
Browse files

ETF HEPCloud init

parent fa77e609
......@@ -7,24 +7,35 @@ LABEL version="1.0"
ENV NSTREAM_ENABLED=0
# OSG Middleware
RUN yum -y install yum-priorities
RUN rpm -Uvh https://repo.opensciencegrid.org/osg/3.4/osg-3.4-el7-release-latest.rpm
RUN sed "7i priority=99" -i /etc/yum.repos.d/epel.repo
# Core packages
RUN yum -y install voms globus-gsi-sysconfig globus-gsi-cert-utils globus-gssapi-gsi globus-gss-assist \
RUN yum -y install voms voms-clients-cpp globus-gsi-sysconfig globus-gsi-cert-utils globus-gssapi-gsi globus-gss-assist \
globus-gsi-proxy-core globus-gsi-credential globus-gsi-callback globus-gsi-openssl-error \
globus-openssl-module globus-gsi-proxy-ssl globus-callout
# Condor client
RUN yum -y install condor condor-python
# Xroot
RUN yum -y install xrootd-python xrootd-client xrootd-libs xrootd-client-libs
# SRM todo: test removing globus deps
RUN yum -y install gfal2-all gfal2-python gfal2-util globus-ftp-client \
globus-gass-transfer globus-ftp-control globus-xio globus-gssapi-error \
globus-gsi-sysconfig globus-gsi-openssl-error globus-openssl-module \
globus-gsi-proxy-ssl
# MW env
COPY ./docker/etf-cms-hepcloud/config/grid-env.sh /etc/profile.d/
RUN echo "source /etc/profile.d/grid-env.sh" >> /opt/omd/sites/$CHECK_MK_SITE/.profile
# VOMS config
RUN mkdir -p /etc/vomses/
COPY ./docker/etf-cms-hepcloud/config/cms-lcg-voms2.cern.ch /etc/vomses/
COPY ./docker/etf-cms-hepcloud/config/cms-voms2.cern.ch /etc/vomses/
# RUN mkdir -p /etc/vomses/
# COPY ./docker/etf-cms-hepcloud/config/cms-lcg-voms2.cern.ch /etc/vomses/
# COPY ./docker/etf-cms-hepcloud/config/cms-voms2.cern.ch /etc/vomses/
#RUN mkdir -p /etc/grid-security/vomsdir/cms/
#COPY ./config/lcg-voms2.cern.ch.lsc /etc/grid-security/vomsdir/cms/
#COPY ./config/voms2.cern.ch.lsc /etc/grid-security/vomsdir/cms/
......@@ -33,15 +44,13 @@ COPY ./docker/etf-cms-hepcloud/config/cms-voms2.cern.ch /etc/vomses/
RUN yum -y install nagios-plugins-wlcg-condor ncgx-config-wlcg nagios-plugins-globus nagios-plugins
# ETF WN payload
RUN yum -y install nagios-plugins-wlcg-org.cms
# RUN yum -y install nagios-plugins-wlcg-org.cms
# ETF job submission setup
RUN yum -y install python-jess python-nap && chmod 755 /usr/lib64/nagios/plugins/check_js
COPY ./docker/etf-cms-hepcloud/config/check_condor.cfg /etc/ncgx/metrics.d/
COPY ./docker/etf-cms-hepcloud/config/metrics.cfg /etc/ncgx/metrics.d/wlcg_cms.cfg
# ETF toplogy config
COPY ./docker/etf-cms-hepcloud/config/etf_plugin_cms_hepcloud.py /usr/lib/ncgx/x_plugins/
# ETF streaming
RUN mkdir -p /var/spool/nstream/outgoing && chmod 777 /var/spool/nstream/outgoing
......@@ -50,6 +59,18 @@ COPY ./docker/etf-cms-hepcloud/config/ocsp_handler.cfg /etc/nstream/
# CMS config
COPY ./docker/etf-cms-hepcloud/config/cms_checks.cfg /etc/ncgx/conf.d/
COPY ./docker/etf-cms-hepcloud/config/etf_plugin_cms_hepcloud.py /usr/lib/ncgx/x_plugins/
# CMS payload
RUN mkdir -p /usr/libexec/grid-monitoring/probes/org.cms/wnjob
COPY SiteTests/SE/* /usr/libexec/grid-monitoring/probes/org.cms/
COPY nagios/config/org.cms.lcgadmin /usr/libexec/grid-monitoring/probes/org.cms/wnjob/org.cms.lcgadmin
COPY nagios/config/org.cms.production /usr/libexec/grid-monitoring/probes/org.cms/wnjob/org.cms.production
COPY nagios/org.cms.glexec /usr/libexec/grid-monitoring/probes/org.cms/wnjob/org.cms.glexec
COPY SiteTests/MonteCarlo /usr/libexec/grid-monitoring/probes/org.cms/wnjob/org.cms/probes/org.cms/testjob/
COPY SiteTests/testjob/tests /usr/libexec/grid-monitoring/probes/org.cms/wnjob/org.cms/probes/org.cms/testjob/tests
COPY SiteTests/FroNtier/tests /usr/libexec/grid-monitoring/probes/org.cms/wnjob/org.cms/probes/org.cms/testjob/tests
COPY nagios/config/cms_glexec-etf /etc/cron.d/cms_glexec
# ETF config
COPY ./docker/etf-cms-hepcloud/config/service_template.tpl /etc/ncgx/templates/
......
import logging
import itertools
import requests
import urlparse
import xml.etree.ElementTree as ET
from ncgx.inventory import Hosts, Checks, Groups
from vofeed.api import VOFeed
log = logging.getLogger('ncgx')
CE_STATE_METRICS = ('org.sam.CONDOR-JobState-/cms/Role=lcgadmin',)
XROOT_METRICS = (
'org.cms.SE-xrootd-contain',
'org.cms.SE-xrootd-connection',
'org.cms.SE-xrootd-version',
)
CE_METRICS = (
'org.sam.CONDOR-JobSubmit-/cms/Role=lcgadmin',
'org.cms.WN-analysis-/cms/Role=lcgadmin',
'org.cms.WN-isolation-/cms/Role=lcgadmin',
'org.cms.WN-basic-/cms/Role=lcgadmin',
'org.cms.WN-cvmfs-/cms/Role=lcgadmin',
'org.cms.WN-env-/cms/Role=lcgadmin',
'org.cms.WN-frontier-/cms/Role=lcgadmin',
'org.cms.WN-mc-/cms/Role=lcgadmin',
'org.cms.WN-remotestageout-/cms/Role=lcgadmin',
'org.cms.WN-squid-/cms/Role=lcgadmin',
'org.cms.WN-xrootd-access-/cms/Role=lcgadmin',
'org.cms.WN-xrootd-fallback-/cms/Role=lcgadmin')
def run(url):
def run(url, ipv6=False):
log.info("Processing vo feed: %s" % url)
# Get services from the VO feed, i.e
# list of tuples (hostname, flavor, endpoint)
feed = VOFeed(url)
services = feed.get_services()
# Add hosts, each tagged with corresponding flavors
# creates /etc/ncgx/conf.d/generated_hosts.cfg
h = Hosts()
for service in services:
h.add(service[0], tags=[service[1]])
h.add('storm.mib.infn.it', tags=["XROOTD", ])
h.serialize()
# Add corresponding metrics to tags
# creates /etc/ncgx/conf.d/generated_checks.cfg
c = Checks()
#c.add_all(CE_METRICS, tags=["HTCONDOR-CE"])
# HT-CONDOR-CEs
for service in services: # special handling for HTCONDOR-CEs (no BDII)
if service[1] == 'HTCONDOR-CE' and 'fnal' in service[0]:
for m in CE_STATE_METRICS:
c.add(m, hosts=(service[0],), params={'args': {'--resource': 'htcondor://%s' % service[0]}})
c.add_all(CE_METRICS, hosts=(service[0],))
c.add_all(XROOT_METRICS, tags=["XROOTD"])
c.add("org.cms.SE-xrootd-read", hosts=('storm.mib.infn.it',),
params={'args': {'--site': 'T3_IT_MIB',
'--endpoint': 'storm.mib.infn.it:1094',
'-4': ''}, '_tags': 'XROOTD'})
c.serialize()
# Add host groups
sites = feed.get_groups("CMS_Site")
hg = Groups("host_groups")
for site, hosts in sites.iteritems():
for host in hosts:
hg.add(site, host)
hg.serialize()
......@@ -2,8 +2,13 @@
set -e
_term() {
omd stop
if [[ -f /var/run/crond.pid ]]; then
kill -9 `cat /var/run/crond.pid`
rm -f /var/run/crond.pid
fi
rm -rf /opt/omd/sites/etf/etc/nagios/conf.d/wlcg/
omd stop
/usr/sbin/httpd -k stop
}
trap _term SIGINT SIGTERM
......@@ -27,23 +32,30 @@ echo "${plugins}"
echo ""
echo "Starting xinetd ..."
export XINETD_LANG="en_US" && /usr/sbin/xinetd -stayalive -pidfile /var/run/xinetd.pid
if [[ -n $CHECK_MK_USER_ID ]] ; then
if [[ -n ${CHECK_MK_USER_ID} ]] ; then
echo "Changing $CHECK_MK_SITE uid to $CHECK_MK_USER_ID"
/usr/sbin/usermod -u $CHECK_MK_USER_ID $CHECK_MK_SITE
chown -R $CHECK_MK_SITE /etc/ncgx /var/cache/ncgx /var/cache/nap
chown -R $CHECK_MK_SITE /usr/libexec/grid-monitoring/probes/
/usr/sbin/usermod -u ${CHECK_MK_USER_ID} ${CHECK_MK_SITE}
chown -R ${CHECK_MK_SITE} /etc/ncgx /var/cache/ncgx /var/cache/nap
chown -R ${CHECK_MK_SITE} /usr/libexec/grid-monitoring/probes/
fi
if [[ -n $CHECK_MK_GROUP_ID ]] ; then
if [[ -n ${CHECK_MK_GROUP_ID} ]] ; then
echo "Creating group with gid $CHECK_MK_GROUP_ID"
/usr/sbin/groupadd -g $CHECK_MK_GROUP_ID sec
/usr/sbin/groupmems -g sec -a $CHECK_MK_SITE
/usr/sbin/groupadd -g ${CHECK_MK_GROUP_ID} sec
/usr/sbin/groupmems -g sec -a ${CHECK_MK_SITE}
fi
echo "Initialising ..."
if [[ -d /opt/omd/sites/etf/etc/nagios/conf.d/wlcg/ ]]; then
rm -rf /opt/omd/sites/etf/etc/nagios/conf.d/wlcg/
fi
/usr/bin/omd stop
echo "Starting crond ..."
/usr/sbin/crond -m off -p -s
echo "Copying certificates ..."
if [ ! -f /etc/grid-security/hostcert.pem ]; then
if [[ ! -f /etc/grid-security/hostcert.pem ]]; then
echo "Failed to find certificates in /etc/grid-security"
exit
fi
......@@ -55,38 +67,39 @@ chown ${CHECK_MK_SITE} /usr/lib64/nagios/plugins/check_js
echo "Configuring access ..."
echo "Configured admins: $CHECK_MK_ADMINS"
sed -i "s|admin_users.*|admin_users = [$CHECK_MK_ADMINS]|" /opt/omd/sites/$CHECK_MK_SITE/etc/check_mk/multisite.mk
sed -i "s|admin_users.*|admin_users = [$CHECK_MK_ADMINS]|" /opt/omd/sites/${CHECK_MK_SITE}/etc/check_mk/multisite.mk
if [ -f /etc/check_mk/contacts.mk ]; then
cp /etc/check_mk/contacts.mk /opt/omd/sites/$CHECK_MK_SITE/etc/check_mk/conf.d/wato/
if [[ -f /etc/check_mk/contacts.mk ]]; then
cp /etc/check_mk/contacts.mk /opt/omd/sites/${CHECK_MK_SITE}/etc/check_mk/conf.d/wato/
fi
if [ -g /etc/check_mk/users.mk ]; then
cp /etc/check_mk/users.mk /opt/omd/sites/$CHECK_MK_SITE/etc/check_mk/conf.d/wato/
if [[ -g /etc/check_mk/users.mk ]]; then
cp /etc/check_mk/users.mk /opt/omd/sites/${CHECK_MK_SITE}/etc/check_mk/conf.d/wato/
fi
cp /etc/ncgx/templates/generic/handlers.cfg /opt/omd/sites/etf/etc/nagios/conf.d/
omd start
rm -f /opt/omd/sites/etf/etc/nagios/conf.d/handlers.cfg
echo "Configuring main.mk: $ETF_HOSTED_BY"
if [ -z "${ETF_HOSTED_BY}" ]; then
if [[ -z "${ETF_HOSTED_BY}" ]]; then
echo " Variable ETF_HOSTED_BY is not defined, not touching main.mk"
else
echo "all_hosts += [ \"${ETF_HOSTED_BY}\" ]" >> /opt/omd/sites/$CHECK_MK_SITE/etc/check_mk/main.mk
grep -qF "${ETF_HOSTED_BY}" /opt/omd/sites/${CHECK_MK_SITE}/etc/check_mk/main.mk || echo "all_hosts += [ \"${ETF_HOSTED_BY}\" ]" >> /opt/omd/sites/${CHECK_MK_SITE}/etc/check_mk/main.mk
fi
echo "Configuring ETF ..."
if [ -z "${ETF_NAGIOS_HOST}" ]; then
if [[ -z "${ETF_NAGIOS_HOST}" ]]; then
echo " Variable ETF_NAGIOS_HOST is not defined, using hostname"
ETF_NAGIOS_HOST=`hostname`
echo "NAGIOS_HOST = \"${ETF_NAGIOS_HOST}\"" >> /etc/ncgx/ncgx.cfg
grep -qF "${ETF_NAGIOS_HOST}" /etc/ncgx/ncgx.cfg || echo "NAGIOS_HOST = \"${ETF_NAGIOS_HOST}\"" >> /etc/ncgx/ncgx.cfg
else
echo "NAGIOS_HOST = \"${ETF_NAGIOS_HOST}\"" >> /etc/ncgx/ncgx.cfg
grep -qF "${ETF_NAGIOS_HOST}" /etc/ncgx/ncgx.cfg || echo "NAGIOS_HOST = \"${ETF_NAGIOS_HOST}\"" >> /etc/ncgx/ncgx.cfg
fi
su etf -c "ncgx"
su etf -c "ncgx --log | tee /opt/omd/sites/etf/var/log/ncgx.log"
su - etf -c "cmk -II; cmk -O"
if [ "${NSTREAM_ENABLED}" -eq "1" ] ; then
if [[ "${NSTREAM_ENABLED}" -eq "1" ]] ; then
echo "Nagios stream enabled ..."
else
echo "Nagios stream disabled ..."
......@@ -110,5 +123,6 @@ echo "Reloading crontab ..."
su etf -c "omd reload crontab"
echo "Starting Apache ..."
/usr/sbin/httpd -DFOREGROUND
/usr/sbin/httpd -DFOREGROUND &
wait $!
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment