etf_plugin_cms.py 7.35 KB
Newer Older
Andrea Sciaba's avatar
Andrea Sciaba committed
1
import logging
2
import json
Andrea Sciaba's avatar
Andrea Sciaba committed
3

Andrea Sciaba's avatar
Andrea Sciaba committed
4
from ncgx.inventory import Hosts, Checks, Groups
Andrea Sciaba's avatar
Andrea Sciaba committed
5
6
7
8
from vofeed.api import VOFeed

log = logging.getLogger('ncgx')

Marian Babik's avatar
Marian Babik committed
9
10
SAME_CODES = {'OK': 10, 'INFO': 20, 'NOTICE': 30, 'WARNING': 40, 'ERROR': 50, 'CRITICAL': 60, 'MAINTENANCE': 100}

11
12
13
14
15
16
FLAVOR_MAP = {'CREAM-CE': 'cream',
              'ARC-CE': 'arc',
              'HTCONDOR-CE': 'condor',
              'GLOBUS': 'gt',
              'OSG-CE': 'gt'}

Andrea Sciaba's avatar
Andrea Sciaba committed
17
CE_STATE_METRICS = [
18
    'org.sam.CONDOR-JobState-/cms/Role=lcgadmin']
Andrea Sciaba's avatar
Andrea Sciaba committed
19
20

CE_METRICS = (
21
    'org.sam.CONDOR-JobSubmit-/cms/Role=lcgadmin',)
Andrea Sciaba's avatar
Andrea Sciaba committed
22
23

SE_METRICS = (
24
25
26
27
28
29
30
31
    'org.cms.SRM-AllCMS-/cms/Role=production',
    'org.cms.SRM-GetPFNFromTFC-/cms/Role=production',
    'org.cms.SRM-VODel-/cms/Role=production',
    'org.cms.SRM-VOGet-/cms/Role=production',
    'org.cms.SRM-VOGetTURLs-/cms/Role=production',
    'org.cms.SRM-VOLs-/cms/Role=production',
    'org.cms.SRM-VOLsDir-/cms/Role=production',
    'org.cms.SRM-VOPut-/cms/Role=production')
Andrea Sciaba's avatar
Andrea Sciaba committed
32

33
XROOT_METRICS = (
34
35
36
    'org.cms.SE-xrootd-contain',
    'org.cms.SE-xrootd-connection',
    'org.cms.SE-xrootd-version',
37
)
Andrea Sciaba's avatar
Andrea Sciaba committed
38

39
WN_METRICS = {
Marian Babik's avatar
Marian Babik committed
40
    'WN-cvmfs': 'org.cms.WN-cvmfs-/cms/Role=lcgadmin',
41
42
    'CE-cms-analysis.sing': 'org.cms.WN-analysis-/cms/Role=lcgadmin',
    'CE-cms-singularity': 'org.cms.WN-isolation-/cms/Role=lcgadmin',
Marian Babik's avatar
Marian Babik committed
43
    'CE-cms-basic.sing': 'org.cms.WN-basic-/cms/Role=lcgadmin',
44
45
46
47
48
49
50
51
    'CE-cms-env': 'org.cms.WN-env-/cms/Role=lcgadmin',
    'CE-cms-frontier.sing': 'org.cms.WN-frontier-/cms/Role=lcgadmin',
    'CE-cms-mc.sing': 'org.cms.WN-mc-/cms/Role=lcgadmin',
    'CE-cms-squid.sing': 'org.cms.WN-squid-/cms/Role=lcgadmin',
    'CE-cms-xrootd-access.sing': 'org.cms.WN-xrootd-access-/cms/Role=lcgadmin',
    'CE-cms-xrootd-fallback.sing': 'org.cms.WN-xrootd-fallback-/cms/Role=lcgadmin'
}

Marian Babik's avatar
Marian Babik committed
52
53
# legacy SFT/SAME metrics which are not nagios compliant
WN_METRICS_LEGACY = [x for x in WN_METRICS.keys() if 'CE' in x]
Marian Babik's avatar
Marian Babik committed
54

55
56

def run(url, ipv6=False):
Andrea Sciaba's avatar
Andrea Sciaba committed
57
58
59
60
    log.info("Processing vo feed: %s" % url)

    # Get services from the VO feed, i.e 
    # list of tuples (hostname, flavor, endpoint)
Andrea Sciaba's avatar
Andrea Sciaba committed
61
62
    feed = VOFeed(url)
    services = feed.get_services()
Andrea Sciaba's avatar
Andrea Sciaba committed
63
64
65
66
67
68
69
70
    
    # Add hosts, each tagged with corresponding flavors
    # creates /etc/ncgx/conf.d/generated_hosts.cfg
    h = Hosts()
    for service in services:
        h.add(service[0], tags=[service[1]])
    h.serialize()

71
72
73
    # Add host groups
    sites = feed.get_groups("CMS_Site")
    hg = Groups("host_groups")
Marian Babik's avatar
Marian Babik committed
74
    for site, hosts in sites.items():
75
76
77
78
        for host in hosts:
            hg.add(site, host)
    hg.serialize()

Andrea Sciaba's avatar
Andrea Sciaba committed
79
80
81
    # Add corresponding metrics to tags
    # creates /etc/ncgx/conf.d/generated_checks.cfg
    c = Checks()
Marian Babik's avatar
Marian Babik committed
82
83
    c.add_all(CE_METRICS, tags=["ARC-CE", "HTCONDOR-CE"])
    c.add_all(WN_METRICS.values(), tags=["ARC-CE", "HTCONDOR-CE"])
84
    c.add_all(SE_METRICS, tags=["SRM"])
85
    c.add_all(XROOT_METRICS, tags=["XROOTD"])
86
    # IPv6
87
88
    c.add("org.cms.DNS-IPv6", tags=["SRM"], params={'extends': 'check_dig'})
    # Special proxy
89
90
    c.add("org.globus.GridProxy-Get-/cms/Role=production", hosts=("localhost",))
    c.add("org.globus.GridProxy-Valid-/cms/Role=production", hosts=("localhost",))
Andrea Sciaba's avatar
Andrea Sciaba committed
91

92
93
94
95
    # XRoot
    for service in services:
        flavor = service[1]
        if flavor not in ["XROOTD"]:
Marian Babik's avatar
Marian Babik committed
96
            continue
97
98
99
100
101
102
103
104
105
        host = service[0]
        endpoint = service[2]
        site = hg.exact_match(host)
        if not site:
            log.warning("Unable to find site for host %s, skipping" % host)
            continue
        if len(site) > 1:
            log.warning("Host assigned to multiple sites: %s, %s picking first one" % (host, site))
        if ipv6:
Marian Babik's avatar
Marian Babik committed
106
107
108
            c.add("org.cms.SE-xrootd-read", hosts=(host,), params={'args': {'--site': site.pop(),
                                                                            '--endpoint': endpoint, '-6': ''},
                                                                   '_tags': 'XROOTD'})
109
        else:
Marian Babik's avatar
Marian Babik committed
110
111
112
            c.add("org.cms.SE-xrootd-read", hosts=(host,), params={'args': {'--site': site.pop(),
                                                                            '--endpoint': endpoint, '-4': ''},
                                                                   '_tags': 'XROOTD'})
113

114
115
116
    # ETF env - environment variables to export on the worker node (global for all sites), such as:
    # ETF_TESTS - points to a list of WN tests to execute (stored in WN_METRICS)
    # ETF_LEGACY should be a subset of ETF_TESTS that identifies SFT tests (those that are not nagios compliant)
Marian Babik's avatar
Marian Babik committed
117
    # SAME* environment needed by the legacy/SFT tests
118
    with open('/tmp/etf-env.sh', 'w') as etf_env:
Marian Babik's avatar
Marian Babik committed
119
120
        etf_env.write('ETF_TESTS={}\n'.format(
                            ','.join(['etf/probes/org.cms/testjob/tests/'+m for m in WN_METRICS.keys()])))
Marian Babik's avatar
Marian Babik committed
121
        etf_env.write('ETF_LEGACY={}\n'.format(
Marian Babik's avatar
Marian Babik committed
122
                            ','.join(['etf/probes/org.cms/testjob/tests/' + m for m in WN_METRICS_LEGACY])))
Marian Babik's avatar
Marian Babik committed
123
124
        for code, value in SAME_CODES.items():
            etf_env.write('SAME_{}={}\n'.format(code, value))
125
        etf_env.write('SAME_VO=cms\n')
Marian Babik's avatar
Marian Babik committed
126
127
        etf_env.write('SAME_TEST_DIRNAME=$ETFROOT/probes/org.cms/testjob/tests\n')
        etf_env.write('SAME_SENSOR_HOME=$ETFROOT/probes/org.cms/testjob\n')
128

Marian Babik's avatar
Marian Babik committed
129
    # ETF WN-qFM config - maps WN scripts back to metrics (WN-cvmfs -> org.lhcb.WN-cvmfs-/lhcb/Role=production)
130
131
132
    with open('/tmp/etf_wnfm.json', 'w') as etf_wnfm:
        json.dump({'wn_metric_map': WN_METRICS, 'counter_enabled': True}, etf_wnfm)

133
    # Queues
134
135
136
    for service in services:
        host = service[0]
        flavor = service[1]
Marian Babik's avatar
Marian Babik committed
137
        if flavor not in ["ARC-CE", "GLOBUS", "HTCONDOR-CE"]:
Andrea Sciaba's avatar
Andrea Sciaba committed
138
            continue
139
140
        if flavor == 'HTCONDOR-CE':
            # special handling for HTCONDOR-CE, no queues
141
142
143
144
            for m in CE_STATE_METRICS:
                c.add(m, hosts=(service[0],), params={'args': {'--resource': 'htcondor://%s' % service[0],
                                                               '--jdl-ads': '\'+maxMemory=2000\''}})
            continue
145
146
147
148
149
150
151
152
153
154
        ce_resources = feed.get_ce_resources(host, flavor)
        if ce_resources:
            batch = ce_resources[0][0]
            queue = ce_resources[0][1]
            if not batch:
                batch = "nopbs"
            if flavor not in FLAVOR_MAP.keys():
                log.warning("Unable to determine type for flavour %s" % flavor)
                continue
            res = "%s://%s/%s/%s/%s" % (FLAVOR_MAP[flavor], host, 'nosched', batch, queue)
155
156
157
158
159
            # special handling: native ARC client
            if flavor == 'ARC-CE' and host in ['ce0004.m45.ihep.su', 'ce0004.m45.ihep.su']:
                c.add('org.sam.ARC-JobState-/cms/Role=lcgadmin', hosts=(service[0],),
                      params={'args': {'--resource': '%s' % res, '--arc-rsl': '\'(memory=2000)\''}})
                continue
160
            for m in CE_STATE_METRICS:
Marian Babik's avatar
Marian Babik committed
161
                if flavor == 'ARC-CE':
Marian Babik's avatar
Marian Babik committed
162
163
                    c.add(m, hosts=(service[0],),   params={'args': {'--resource': '%s' % res,
                                                                     '--arc-rsl': '\'(memory=2000)\''}})
164
165
                else:
                    c.add(m, hosts=(service[0],), params={'args': {'--resource': '%s' % res}})
166
        else:
167
168
            res = "%s://%s/%s/%s/%s" % (FLAVOR_MAP[flavor], host, 'nosched', 'nobatch', 'noqueue')
            for m in CE_STATE_METRICS:
Marian Babik's avatar
Marian Babik committed
169
                c.add(m, hosts=(service[0],), params={'args': {'--resource': '%s' % res}})
170
            
Andrea Sciaba's avatar
Andrea Sciaba committed
171
    c.serialize()