import logging import json from ncgx.inventory import Hosts, Checks, Groups from vofeed.api import VOFeed log = logging.getLogger('ncgx') FLAVOR_MAP = {'CREAM-CE': 'cream', 'ARC-CE': 'arc', 'HTCONDOR-CE': 'condor', 'GLOBUS': 'gt', 'OSG-CE': 'gt'} CE_STATE_METRICS = [ 'org.sam.CONDOR-JobState-/cms/Role=lcgadmin'] CE_METRICS = ( 'org.sam.CONDOR-JobSubmit-/cms/Role=lcgadmin',) SE_METRICS = ( 'org.cms.SRM-AllCMS-/cms/Role=production', 'org.cms.SRM-GetPFNFromTFC-/cms/Role=production', 'org.cms.SRM-VODel-/cms/Role=production', 'org.cms.SRM-VOGet-/cms/Role=production', 'org.cms.SRM-VOGetTURLs-/cms/Role=production', 'org.cms.SRM-VOLs-/cms/Role=production', 'org.cms.SRM-VOLsDir-/cms/Role=production', 'org.cms.SRM-VOPut-/cms/Role=production') XROOT_METRICS = ( 'org.cms.SE-xrootd-contain', 'org.cms.SE-xrootd-connection', 'org.cms.SE-xrootd-version', ) WN_METRICS = { 'WN-cvmfs': 'org.cms.WN-cvmfs-/cms/Role=lcgadmin', 'CE-cms-analysis.sing': 'org.cms.WN-analysis-/cms/Role=lcgadmin', 'CE-cms-singularity': 'org.cms.WN-isolation-/cms/Role=lcgadmin', 'CE-cms-basic': 'org.cms.WN-basic-/cms/Role=lcgadmin', 'CE-cms-env': 'org.cms.WN-env-/cms/Role=lcgadmin', 'CE-cms-frontier.sing': 'org.cms.WN-frontier-/cms/Role=lcgadmin', 'CE-cms-mc.sing': 'org.cms.WN-mc-/cms/Role=lcgadmin', 'CE-cms-squid.sing': 'org.cms.WN-squid-/cms/Role=lcgadmin', 'CE-cms-xrootd-access.sing': 'org.cms.WN-xrootd-access-/cms/Role=lcgadmin', 'CE-cms-xrootd-fallback.sing': 'org.cms.WN-xrootd-fallback-/cms/Role=lcgadmin' } # legacy SFT/SAME metrics which are not nagios compliant WN_METRICS_LEGACY = [x for x in WN_METRICS.keys() if 'CE' in x] def run(url, ipv6=False): log.info("Processing vo feed: %s" % url) # Get services from the VO feed, i.e # list of tuples (hostname, flavor, endpoint) feed = VOFeed(url) services = feed.get_services() # Add hosts, each tagged with corresponding flavors # creates /etc/ncgx/conf.d/generated_hosts.cfg h = Hosts() for service in services: h.add(service[0], tags=[service[1]]) h.serialize() # Add host groups sites = feed.get_groups("CMS_Site") hg = Groups("host_groups") for site, hosts in sites.items(): for host in hosts: hg.add(site, host) hg.serialize() # Add corresponding metrics to tags # creates /etc/ncgx/conf.d/generated_checks.cfg c = Checks() c.add_all(CE_METRICS, tags=["CREAM-CE", "ARC-CE", "HTCONDOR-CE"]) c.add_all(WN_METRICS.values(), tags=["CREAM-CE", "ARC-CE", "HTCONDOR-CE"]) c.add_all(SE_METRICS, tags=["SRM"]) c.add_all(XROOT_METRICS, tags=["XROOTD"]) # IPv6 c.add("org.cms.DNS-IPv6", tags=["SRM"], params={'extends': 'check_dig'}) # Special proxy c.add("org.globus.GridProxy-Get-/cms/Role=production", hosts=("localhost",)) c.add("org.globus.GridProxy-Valid-/cms/Role=production", hosts=("localhost",)) # XRoot for service in services: flavor = service[1] if flavor not in ["XROOTD"]: continue host = service[0] endpoint = service[2] site = hg.exact_match(host) if not site: log.warning("Unable to find site for host %s, skipping" % host) continue if len(site) > 1: log.warning("Host assigned to multiple sites: %s, %s picking first one" % (host, site)) if ipv6: c.add("org.cms.SE-xrootd-read", hosts=(host,), params={'args': {'--site': site.pop(), '--endpoint': endpoint, '-6': ''}, '_tags': 'XROOTD'}) else: c.add("org.cms.SE-xrootd-read", hosts=(host,), params={'args': {'--site': site.pop(), '--endpoint': endpoint, '-4': ''}, '_tags': 'XROOTD'}) # ETF env - environment variables to export on the worker node (global for all sites), such as: # ETF_TESTS - points to a list of WN tests to execute (stored in WN_METRICS) # ETF_LEGACY should be a subset of ETF_TESTS that identifies SFT tests (those that are not nagios compliant) # SAME* environment needed by the legacy/SFT tests with open('/tmp/etf-env.sh', 'w') as etf_env: etf_env.write('ETF_TESTS={}\n'.format( ','.join(['etf/probes/org.cms/testjob/tests/'+m for m in WN_METRICS.keys()]))) etf_env.write('ETF_LEGACY={}\n'.format( ','.join(['etf/probes/org.cms/testjob/tests/' + m for m in WN_METRICS_LEGACY]))) # for code, value in SAME_CODES.items(): # etf_env.write('SAME_{}={}\n'.format(code, value)) etf_env.write('SAME_VO=cms\n') etf_env.write('SAME_TEST_DIRNAME=$ETFROOT/probes/org.cms/testjob/tests\n') etf_env.write('SAME_SENSOR_HOME=$ETFROOT/probes/org.cms/testjob\n') # ETF WN-qFM config - maps WN scripts back to metrics (WN-cvmfs -> org.lhcb.WN-cvmfs-/lhcb/Role=production) with open('/tmp/etf_wnfm.json', 'w') as etf_wnfm: json.dump({'wn_metric_map': WN_METRICS, 'counter_enabled': True}, etf_wnfm) # Queues for service in services: host = service[0] flavor = service[1] if flavor not in ["CREAM-CE", "ARC-CE", "GLOBUS", "HTCONDOR-CE"]: continue if flavor == 'HTCONDOR-CE': # special handling for HTCONDOR-CE, no queues for m in CE_STATE_METRICS: c.add(m, hosts=(service[0],), params={'args': {'--resource': 'htcondor://%s' % service[0], '--jdl-ads': '\'+maxMemory=2000\''}}) continue ce_resources = feed.get_ce_resources(host, flavor) if ce_resources: batch = ce_resources[0][0] queue = ce_resources[0][1] if not batch: batch = "nopbs" if flavor not in FLAVOR_MAP.keys(): log.warning("Unable to determine type for flavour %s" % flavor) continue res = "%s://%s/%s/%s/%s" % (FLAVOR_MAP[flavor], host, 'nosched', batch, queue) for m in CE_STATE_METRICS: if flavor == 'CREAM-CE': c.add(m, hosts=(service[0],), params={'args': {'--resource': '%s' % res, '--jdl-ads': '\'cream_attributes=CERequirements=\"other.GlueHostMainMemoryRAMSize==2000\"\''}}) else: c.add(m, hosts=(service[0],), params={'args': {'--resource': '%s' % res}}) else: res = "%s://%s/%s/%s/%s" % (FLAVOR_MAP[flavor], host, 'nosched', 'nobatch', 'noqueue') for m in CE_STATE_METRICS: c.add(m, hosts=(service[0],), params={'args': {'--resource': '%s' % res}}) c.serialize()