From d5c0948d14cdcef0f208ea91c36bf17c0d948813 Mon Sep 17 00:00:00 2001 From: Nikos Tsipinakis <nikos@tsipinakis.com> Date: Thu, 21 Jul 2022 11:53:35 +0200 Subject: [PATCH] [Global] Sync config with ITB --- global/frontend.xml | 163 +++++++------------------------------------- 1 file changed, 23 insertions(+), 140 deletions(-) diff --git a/global/frontend.xml b/global/frontend.xml index cbc93927..0a612210 100644 --- a/global/frontend.xml +++ b/global/frontend.xml @@ -1,7 +1,5 @@ <frontend advertise_delay="1" advertise_with_multiple="True" advertise_with_tcp="True" downtimes_file="frontenddowntime" frontend_monitor_index_page="True" frontend_name="CMSG-v1_0" frontend_versioning="False" group_parallel_workers="2" loop_delay="120" restart_attempts="3" restart_interval="1800"> <config> - <idle_vms_total curb="20000" max="22000"/> - <idle_vms_total_global curb="20000" max="22000"/> <running_glideins_total curb="200000" max="220000"/> <running_glideins_total_global curb="200000" max="220000"/> </config> @@ -18,7 +16,7 @@ </process_logs> </log_retention> <match match_expr='__import__("CMSGlobalMatcher").match(job, glidein)' start_expr='ifthenelse(GLIDEIN_REQUIRED_OS=?="any", (HAS_SINGULARITY=?=true && GLIDEIN_PS_HAS_SINGULARITY=!=false),( isUndefined(REQUIRED_OS) || REQUIRED_OS=?="any" || REQUIRED_OS=?=GLIDEIN_REQUIRED_OS )) && ifthenelse(MaxWallTimeMins=!=UNDEFINED,(MaxWallTimeMins*60)<(GLIDEIN_ToDie-MyCurrentTime),(16*3600)<(GLIDEIN_ToDie-MyCurrentTime)) && ((DynamicSlot =!= true) || (RequestCpus=?=Cpus)) && ifthenelse(SlotType=?="Static", RequestCpus <= Cpus, True)'> - <factory query_expr='(stringListMember("CMS",GLIDEIN_Supported_VOs) || stringListMember("CMSGPU",GLIDEIN_Supported_VOs)) && (GLIDEIN_Max_Walltime=!=UNDEFINED) && (GLIDEIN_Retire_Time_Spread=!=UNDEFINED) && (GLIDEIN_MaxMemMBs=!=UNDEFINED) && (GLIDEIN_CMSSite=!=UNDEFINED) && (!stringListsIntersect("T2_CH_CERN_AI,T2_CH_CERN_HLT,T0_CH_CERN,T3_CH_CERN_CAF,T2_CH_CERN",GLIDEIN_CMSSite))'> + <factory query_expr='(stringListMember("CMS",GLIDEIN_Supported_VOs) || stringListMember("CMSGPU",GLIDEIN_Supported_VOs)) && (GLIDEIN_Max_Walltime=!=UNDEFINED) && (GLIDEIN_Retire_Time_Spread=!=UNDEFINED) && (GLIDEIN_MaxMemMBs=!=UNDEFINED) && (GLIDEIN_CMSSite=!=UNDEFINED) && (!stringListsIntersect("T2_CH_CERN_AI,T2_CH_CERN_HLT,T0_CH_CERN",GLIDEIN_CMSSite))'> <match_attrs> <match_attr name="GLIDEIN_Job_Min_Time" type="int"/> <match_attr name="GLIDEIN_MaxMemMBs" type="int"/> @@ -108,11 +106,8 @@ <attr name="CMS_GLIDEIN_VERSION" glidein_publish="True" job_publish="True" parameter="True" type="int" value="19"/> <attr name="CONDOR_VERSION" glidein_publish="False" job_publish="False" parameter="True" type="string" value="9.0.7"/> <attr name="CONDOR_OS" glidein_publish="False" job_publish="False" parameter="True" type="string" value="auto"/> - <attr name="GLEXEC_HOLD_ON_INITIAL_FAILURE" glidein_publish="True" job_publish="False" parameter="False" type="int" value="0"/> - <attr name="GLEXEC_RETRY_DELAY" glidein_publish="True" job_publish="False" parameter="False" type="int" value="30"/> - <attr name="GLIDEIN_CLAIM_WORKLIFE_DYNAMIC" glidein_publish="True" job_publish="True" parameter="True" type="expr" value="cpus*(6*3600)"/> + <attr name="GLIDEIN_CLAIM_WORKLIFE_DYNAMIC" comment="Allowing slots to be reused" glidein_publish="True" job_publish="True" parameter="True" type="expr" value="cpus*(6*3600)"/> <attr name="GLIDEIN_Expose_Grid_Env" glidein_publish="True" job_publish="True" parameter="False" type="string" value="True"/> - <attr name="GLIDEIN_Glexec_Use" comment="This will use glexec wherever available" glidein_publish="False" job_publish="False" parameter="True" type="string" value="OPTIONAL"/> <attr name="GLIDEIN_Job_Max_Time" glidein_publish="True" job_publish="True" parameter="True" type="int" value="14400"/> <attr name="GLIDEIN_Max_Idle" glidein_publish="True" job_publish="True" parameter="True" type="int" value="600"/> <attr name="GLIDEIN_Max_Tail" glidein_publish="True" job_publish="True" parameter="True" type="int" value="600"/> @@ -126,8 +121,6 @@ <attr name="CVMFS_REPOS_LIST" glidein_publish="True" job_publish="True" parameter="False" type="string" value="cms.cern.ch,oasis.opensciencegrid.org,singularity.opensciencegrid.org"/> <attr name="GLIDEIN_Singularity_Use" glidein_publish="True" job_publish="True" parameter="True" type="string" value="REQUIRED"/> <attr name="SINGULARITY_IMAGES_DICT" glidein_publish="True" job_publish="True" parameter="False" type="string" value="default:/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel7,rhel6:/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel6,rhel7:/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel7,rhel8:/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel8"/> - - <attr name="SLOTS_LAYOUT" glidein_publish="True" job_publish="True" parameter="False" type="string" value="partitionable"/> <attr name="UPDATE_INTERVAL" glidein_publish="True" job_publish="False" parameter="False" type="expr" value="$RANDOM_INTEGER(680,750,1)"/> <attr name="USE_MATCH_AUTH" glidein_publish="False" job_publish="False" parameter="True" type="string" value="True"/> @@ -223,9 +216,7 @@ <config> <glideins_removal margin="5" requests_tracking="True" type="IDLE" wait="0"/> <idle_glideins_lifetime max="86400"/> - <idle_glideins_per_entry max="50" reserve="5"/> - <idle_vms_per_entry curb="100" max="200"/> - <idle_vms_total curb="3500" max="4000"/> + <idle_glideins_per_entry max="100" reserve="5"/> <processing_workers matchmakers="3"/> <running_glideins_per_entry max="12000" min="0" relative_to_queue="1.05"/> <running_glideins_total curb="11000" max="12000"/> @@ -314,11 +305,29 @@ <security proxy_selection_plugin="ProxyAll"> <credentials> <credential absfname="/var/lib/gwms-frontend/.globus/x509_cmspilot02_cms_pilot.proxy" security_class="cmspilot" trust_domain="grid" type="grid_proxy"/> - <credential absfname="/var/lib/gwms-frontend/tokens.d/cern-itb.scitoken" security_class="frontend" trust_domain="grid" type="scitoken"/> + <credential absfname="/var/lib/gwms-frontend/tokens.d/cern.scitoken" security_class="frontend" trust_domain="grid" type="scitoken"/> + </credentials> + </security> + <attrs> + <attr name="GLIDEIN_Singularity_Use" glidein_publish="True" job_publish="True" parameter="True" type="string" value="OPTIONAL"/> + <attr name="GLIDEIN_CustomizeStart_Entrylist" glidein_publish="True" job_publish="False" parameter="True" type="string" value="CMSHTPC_T1_IT_CNAF_CINECA"/> + </attrs> + </group> + <group name="main-vacuum-gpu" comment="This group is reserved for generating GPU glidein_startup.sh wrappers" enabled="True"> + <match match_expr='False' start_expr="( ifthenelse(DESIRED_Sites isnt undefined, stringListMember(GLIDEIN_CMSSite,DESIRED_Sites), undefined) || ifthenelse(DESIRED_Gatekeepers isnt undefined, stringListMember(GLIDEIN_Gatekeeper,DESIRED_Gatekeepers), undefined) )"> + <factory query_expr='stringListsIntersect("T1_IT_CNAF",GLIDEIN_CMSSite) && stringListMember("CMSGPU",GLIDEIN_Supported_VOs)'> <!-- That's only used for Marconi100 for now --> + </factory> + <job query_expr='False'> + </job> + </match> + <security proxy_selection_plugin="ProxyAll"> + <credentials> + <credential absfname="/var/lib/gwms-frontend/.globus/x509_cmspilot04_cms_pilot.proxy" security_class="cmspilot" trust_domain="grid" type="grid_proxy"/> </credentials> </security> <attrs> - <attr name="GLIDEIN_Singularity_Use" glidein_publish="True" job_publish="True" parameter="True" type="string" value="REQUIRED"/> + <attr name="ALL_DEBUG" glidein_publish="True" job_publish="False" parameter="False" type="expr" value="D_ALL"/> + <attr name="GLIDEIN_Singularity_Use" glidein_publish="True" job_publish="True" parameter="True" type="string" value="OPTIONAL"/> <attr name="GLIDEIN_CustomizeStart_Entrylist" glidein_publish="True" job_publish="False" parameter="True" type="string" value="CMSHTPC_T1_IT_CNAF_CINECA"/> </attrs> </group> @@ -327,8 +336,6 @@ <glideins_removal margin="5" requests_tracking="True" type="IDLE" wait="0"/> <idle_glideins_lifetime max="82800"/> <idle_glideins_per_entry max="100" reserve="3"/> - <idle_vms_per_entry curb="250" max="330"/> - <idle_vms_total curb="11000" max="13000"/> <processing_workers matchmakers="3"/> <running_glideins_per_entry max="15000" min="0" relative_to_queue="1.05"/> <running_glideins_total curb="167000" max="170000"/> @@ -362,45 +369,6 @@ <files> </files> </group> - <group name="opportunistic_conservative" comment="This group represents the allocation based opportunistic resources" enabled="False"> - <config> - <glideins_removal margin="0" requests_tracking="False" type="NO" wait="0"/> - <idle_glideins_lifetime max="0"/> - <idle_glideins_per_entry max="800" reserve="2"/> - <idle_vms_per_entry curb="800" max="820"/> - <idle_vms_total curb="800" max="820"/> - <processing_workers matchmakers="1"/> - <running_glideins_per_entry max="800" min="0" relative_to_queue="1.05"/> - <running_glideins_total curb="800" max="820"/> - </config> - <match match_expr='(((glidein["attrs"].get("GLIDEIN_CMSSite") in job["DESIRED_Sites"].split(",")) if (job.has_key("DESIRED_Sites")) else False))' start_expr="(ifthenelse(DESIRED_Sites isnt undefined, stringListMember(GLIDEIN_CMSSite,DESIRED_Sites), undefined)) && (ifthenelse(WMAgent_AgentName isnt undefined, WMAgent_AgentName=!=UNDEFINED, undefined)) && ( isUndefined(RequestGPUs) || RequestGPUs =?= 0 )"> - <factory query_expr='stringListsIntersect("T3_US_NERSC,T3_US_NERSC_Cori",GLIDEIN_CMSSite) && stringListMember("CMS",GLIDEIN_Supported_VOs)'> - <match_attrs> - <match_attr name="GLIDEIN_CMSSite" type="string"/> - </match_attrs> - <collectors> - </collectors> - </factory> - <job query_expr='(WMAgent_AgentName=!=UNDEFINED) && stringListsIntersect("T3_US_NERSC,T3_US_NERSC_Cori",DESIRED_Sites) && ( isUndefined(RequestGPUs) || RequestGPUs =?= 0 )'> - <match_attrs> - <match_attr name="DESIRED_Sites" type="string"/> - <match_attr name="WMAgent_AgentName" type="string"/> - </match_attrs> - <schedds> - </schedds> - </job> - </match> - <security proxy_selection_plugin="ProxyAll"> - <credentials> - <credential absfname="/var/lib/gwms-frontend/.globus/cmsbosco_id_rsa.pub" keyabsfname="/var/lib/gwms-frontend/.globus/cmsbosco_id_rsa" pilotabsfname="/var/lib/gwms-frontend/.globus/x509_cmspilot02_cms_pilot.proxy" security_class="cmspilot" trust_domain="bosco" type="key_pair"/> - <credential absfname="/var/lib/gwms-frontend/.globus/cmsbosco_id_rsa.gordon.pub" keyabsfname="/var/lib/gwms-frontend/.globus/cmsbosco_id_rsa.gordon" pilotabsfname="/var/lib/gwms-frontend/.globus/x509_cmspilot02_cms_pilot.proxy" security_class="cmspilot" trust_domain="bosco.gordon" type="key_pair"/> - </credentials> - </security> - <attrs> - </attrs> - <files> - </files> - </group> <group name="overflow" enabled="True"> <config> <glideins_removal margin="5" requests_tracking="True" type="IDLE" wait="0"/> @@ -436,7 +404,6 @@ <credentials> <credential absfname="/var/lib/gwms-frontend/.globus/x509_cmspilot02_cms_pilot.proxy" security_class="cmspilot" trust_domain="grid" type="grid_proxy"/> <credential absfname="/var/lib/gwms-frontend/tokens.d/cern.scitoken" security_class="frontend" trust_domain="grid" type="scitoken"/> - </credentials> </security> <attrs> @@ -491,8 +458,6 @@ <glideins_removal margin="5" requests_tracking="True" type="IDLE" wait="0"/> <idle_glideins_lifetime max="82800"/> <idle_glideins_per_entry max="10" reserve="2"/> - <idle_vms_per_entry curb="10" max="20"/> - <idle_vms_total curb="20" max="30"/> <processing_workers matchmakers="1"/> <running_glideins_per_entry max="10" min="0" relative_to_queue="1.05"/> <running_glideins_total curb="40" max="50"/> @@ -528,88 +493,6 @@ <files> </files> </group> - <group name="tier2_ioslots" enabled="False"> - <config> - <glideins_removal margin="0" requests_tracking="False" type="NO" wait="0"/> - <idle_glideins_lifetime max="3600"/> - <idle_glideins_per_entry max="600" reserve="10"/> - <idle_vms_per_entry curb="100" max="200"/> - <idle_vms_total curb="3500" max="4000"/> - <processing_workers matchmakers="2"/> - <running_glideins_per_entry max="15000" min="0" relative_to_queue="1.05"/> - <running_glideins_total curb="70000" max="75000"/> - </config> - <match match_expr='(((glidein["attrs"].get("GLIDEIN_CMSSite") in job["DESIRED_Sites"].split(",")) if (job.has_key("DESIRED_Sites")) else False) or ((glidein["attrs"].get("GLIDEIN_Gatekeeper") in job["DESIRED_Gatekeepers"].split(",")) if (job.has_key("DESIRED_Gatekeepers")) else False))' start_expr="(ifthenelse(DESIRED_Sites isnt undefined, stringListMember(GLIDEIN_CMSSite,DESIRED_Sites), undefined) || ifthenelse(DESIRED_Gatekeepers isnt undefined, stringListMember(GLIDEIN_Gatekeeper,DESIRED_Gatekeepers), undefined)) && ( isUndefined(RequestGPUs) || RequestGPUs =?= 0 )"> - <factory query_expr='stringListsIntersect("T2_US_Nebraska",GLIDEIN_CMSSite) && stringListMember("CMS",GLIDEIN_Supported_VOs)'> - <match_attrs> - <match_attr name="GLIDEIN_CMSSite" type="string"/> - <match_attr name="GLIDEIN_Gatekeeper" type="string"/> - </match_attrs> - <collectors> - </collectors> - </factory> - <job query_expr='stringListsIntersect("T2_US_Nebraska",DESIRED_Sites) && ( isUndefined(RequestGPUs) || RequestGPUs =?= 0 )'> - <match_attrs> - <match_attr name="CRAB_UserRole" type="string"/> - <match_attr name="DESIRED_Gatekeepers" type="string"/> - <match_attr name="DESIRED_Sites" type="string"/> - <match_attr name="WMAgent_AgentName" type="string"/> - </match_attrs> - <schedds> - </schedds> - </job> - </match> - <security> - <credentials> - <credential absfname="/var/lib/gwms-frontend/.globus/x509_cmspilot02_cms_pilot.proxy" security_class="cmspilot" trust_domain="grid" type="grid_proxy"/> - </credentials> - </security> - <attrs> - <attr name="GLIDEIN_Resource_Slots" glidein_publish="True" job_publish="True" parameter="True" type="string" value="Ioslots,1,1024,static"/> - </attrs> - <files> - </files> - </group> - <group name="custom_start" comment="This group is for sites where we want to customize pilots" enabled="False"> - <config> - <glideins_removal margin="5" requests_tracking="True" type="IDLE" wait="0"/> - <idle_glideins_lifetime max="82800"/> - <idle_glideins_per_entry max="100" reserve="3"/> - <idle_vms_per_entry curb="250" max="330"/> - <idle_vms_total curb="11000" max="13000"/> - <processing_workers matchmakers="3"/> - <running_glideins_per_entry max="15000" min="0" relative_to_queue="1.05"/> - <running_glideins_total curb="200000" max="220000"/> - </config> - <match match_expr='(((glidein["attrs"].get("GLIDEIN_CMSSite") in job["DESIRED_Sites"].split(",")) if (job.has_key("DESIRED_Sites")) else False) or ((glidein["attrs"].get("GLIDEIN_Gatekeeper") in job["DESIRED_Gatekeepers"].split(",")) if (job.has_key("DESIRED_Gatekeepers")) else False))' start_expr="(ifthenelse(DESIRED_Sites isnt undefined, stringListMember(GLIDEIN_CMSSite,DESIRED_Sites), undefined) || ifthenelse(DESIRED_Gatekeepers isnt undefined, stringListMember(GLIDEIN_Gatekeeper,DESIRED_Gatekeepers), undefined)) && (isUndefined(RequestGPUs) || RequestGPUs =?= 0 )"> - <factory query_expr='(stringListsIntersect("T2_US_Caltech",GLIDEIN_CMSSite) || stringListsIntersect("CMSHTPC_T1_IT_CNAF_CINECA,CMSHTPC_T1_DE_KIT_cloud-htcondor-ce-1-kit,CMSHTPC_T1_DE_KIT_cloud-htcondor-ce-1-kit-short,CMSHTPC_T1_DE_KIT_cloud-htcondor-ce-1-kit-medium,CMSHTPC_T1_DE_KIT_cloud-htcondor-ce-2-kit,CMSHTPC_T1_DE_KIT_cloud-htcondor-ce-2-kit-short,CMSHTPC_T1_DE_KIT_cloud-htcondor-ce-2-kit-medium",EntryName)) && stringListMember("CMS",GLIDEIN_Supported_VOs)'> - <match_attrs> - <match_attr name="GLIDEIN_CMSSite" type="string"/> - <match_attr name="GLIDEIN_Gatekeeper" type="string"/> - </match_attrs> - <collectors> - </collectors> - </factory> - <job query_expr="( isUndefined(RequestGPUs) || RequestGPUs =?= 0 )"> - <match_attrs> - <match_attr name="DESIRED_Gatekeepers" type="string"/> - <match_attr name="DESIRED_Sites" type="string"/> - </match_attrs> - <schedds> - </schedds> - </job> - </match> - <security proxy_selection_plugin="ProxyAll"> - <credentials> - <credential absfname="/var/lib/gwms-frontend/.globus/x509_cmspilot02_cms_pilot.proxy" security_class="cmspilot" trust_domain="grid" type="grid_proxy"/> - </credentials> - </security> - <attrs> - <attr name="GLIDEIN_CustomizeStart" glidein_publish="True" job_publish="False" parameter="True" type="string" value="True"/> - </attrs> - <files> - </files> - </group> </groups> <ccbs> <ccb DN="/DC=ch/DC=cern/OU=computers/CN=vocms0813.cern.ch" group="ccb1" node="vocms0813.cern.ch:9618?sock=collector9621-9720"/> -- GitLab