Commit 7f386bb0 authored by Andrew McNab's avatar Andrew McNab
Browse files

More GE

parent b186f8bb
......@@ -36,6 +36,7 @@ INSTALL_FILES=VERSION mjf.init \
mjf.sh.torque mjf.csh.torque mjf-get-total-cpu.torque \
mjf.sh.htcondor mjf.csh.htcondor mjf-get-total-cpu.htcondor \
mjf-job-wrapper make-jobfeatures.htcondor \
mjf.sh.gridengine mjf.csh.gridengine make-jobfeatures.gridengine \
mjf.sh.onlymf mjf.csh.onlymf \
DIRACbenchmark.py db12.init
......@@ -113,6 +114,24 @@ htcondor-rpm: mjf-scripts.tgz
--define "_topdir $(shell pwd)/RPMTMP" \
--buildroot $(shell pwd)/RPMTMP/BUILDROOT mjf-htcondor.spec
gridengine-install: $(INSTALL_FILES) install
mkdir -p $(RPM_BUILD_ROOT)/usr/sbin
cp mjf.sh.gridengine \
$(RPM_BUILD_ROOT)/etc/profile.d/mjf.sh
cp mjf.csh.gridengine \
$(RPM_BUILD_ROOT)/etc/profile.d/mjf.csh
cp make-jobfeatures.gridengine \
$(RPM_BUILD_ROOT)/usr/sbin/make-jobfeatures
gridengine-rpm: mjf-scripts.tgz
rm -Rf RPMTMP
mkdir -p RPMTMP/SOURCES RPMTMP/SPECS RPMTMP/BUILD \
RPMTMP/SRPMS RPMTMP/RPMS/noarch RPMTMP/BUILDROOT
cp -f mjf-scripts.tgz RPMTMP/SOURCES
export MJF_VERSION=$(VERSION) ; rpmbuild -ba \
--define "_topdir $(shell pwd)/RPMTMP" \
--buildroot $(shell pwd)/RPMTMP/BUILDROOT mjf-gridengine.spec
db12-install: $(INSTALL_FILES)
mkdir -p $(RPM_BUILD_ROOT)/etc/rc.d/init.d \
$(RPM_BUILD_ROOT)/etc/db12 \
......
#!/usr/bin/python
#
# make-jobfeatures script for Machine/Job Features on Grid Engine
#
# Andrew McNab, University of Manchester.
# Copyright (c) 2016. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# o Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
# o Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# Create $JOBFEATURES files following the Machine/Job Features
# specification in HSF-TN-2016-02
#
# This script creates the $JOBFEATURES directory at
# /tmp/mjf-$USER/jobfeatures-$JOB_ID
# and attempts to populate it from OS/batch information
# and from $MACHINEFEATURES=/etc/machinefeatures
#
# If the following variables are present in /var/run/mjf or
# /etc/sysconfig/mjf then they are used as defaults if not
# obtainable from Grid Engine:
#
# - allocated_cpu
# - wall_limit_secs
# - cpu_limit_secs
# - max_rss_bytes
# - max_swap_bytes
# - scratch_limit_bytes
# - hs06_job
# - db12_job
#
# Values in /var/run/mjf are preferred over /etc/sysconfig/mjf.
#
# The following per-cpu values in either mjf file will be used
# to calculate the corresponding per-job values if not given in
# either file or obtainable from Grid Engine:
#
# - cpu_limit_secs_per_cpu
# - max_rss_bytes_per_cpu
# - max_swap_bytes_per_cpu
# - scratch_limit_bytes_per_cpu
#
# Additionally you can set mjf_tmp_dir in those files to use a
# directory other than /tmp for the mjf-$USER directories.
#
import os
import re
import pwd
import sys
import stat
import time
os.umask(0022)
try:
job_id = os.environ['JOB_ID']
except:
sys.stderr.write('Could not get Grid Engine $JOB_ID - exiting make-jobfeatures\n')
sys.exit(10)
# Values in /var/run/mjf take precedence
try:
fromRun = open('/var/run/mjf','r').read()
except:
fromRun = ''
# Also look in persistent /etc/sysconfig/mjf
try:
fromSysconfig = open('/etc/sysconfig/mjf','r').read()
except:
fromSysconfig = ''
mjfTmpDir = '/tmp'
if 'mjf_tmp_dir=' in fromRun + fromSysconfig:
try:
matchObject = re.search("mjf_tmp_dir=(.*)", fromRun + '\n' + fromSysconfig)
mjfTmpDir = matchObject.group(1)
except:
pass
try:
userName = pwd.getpwuid(os.getuid())[0]
except:
userName = str(os.getuid())
# First make sure the mjfTmpDir/mjf-$USER directory exists
try:
os.mkdir(mjfTmpDir + '/mjf-' + userName)
except:
# Ok if exists already
pass
try:
userDirStat = os.stat(mjfTmpDir + '/mjf-' + userName)
except:
print mjfTmpDir + '/mjf-' + userName + ' could not be created'
sys.exit(12)
# Check it is owned by us
if userDirStat.st_uid != os.getuid() or userDirStat.st_gid != os.getgid():
print mjfTmpDir + '/mjf-' + userName + ' has the wrong UID/GID'
sys.exit(13)
# Ensure the right permissions
os.chmod(mjfTmpDir + '/mjf-' + userName,
stat.S_IRWXU + stat.S_IRGRP + stat.S_IXGRP + stat.S_IROTH + stat.S_IXOTH)
# Now the directory for this job (but with # -> _)
jobfeaturesDir = mjfTmpDir + '/mjf-' + userName + '/jobfeatures-' + job_id
if os.path.exists(jobfeaturesDir):
# Silently exit if has been created already
sys.exit(0)
try:
os.mkdir(jobfeaturesDir)
except:
print 'Failed to create ' + jobfeaturesDir
sys.exit(14)
open(jobfeaturesDir + '/job_id', 'w').write(job_id)
jobfeatures = {}
jobfeatures['allocated_cpu'] = 1
jobstart_secs = int(time.time())
open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs))
# Get any defaults for these (integer) values
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:
if key + '=' in fromRun + fromSysconfig:
try:
matchObject = re.search(key + "=([0-9]*)", fromRun + '\n' + fromSysconfig)
jobfeatures[key] = int(matchObject.group(1))
except:
pass
# Look for $NSLOTS for the number of CPUs allocated
try:
jobfeatures['allocated_cpu'] = int(os.environ['NSLOTS'])
except:
pass
# Look for any per-cpu values given in either file
for key in ['cpu_limit_secs', 'max_rss_bytes',
'max_swap_bytes', 'scratch_limit']:
if key + '_per_cpu=' in fromRun + fromSysconfig:
try:
matchObject = re.search(key + "_per_cpu=([0-9]*)", fromRun + '\n' + fromSysconfig)
if key not in jobfeatures:
jobfeatures[key] = int(matchObject.group(1)) * jobfeatures['allocated_cpu']
except:
pass
#
#
# Get jobfeatures['wall_limit_secs'] here, somehow?
#
#
if not 'cpu_limit_secs' in jobfeatures and 'wall_limit_secs' in jobfeatures:
# If not given in mjf files, we create a CPU seconds limit from wallclock
# and allocated CPUs/processors
jobfeatures['cpu_limit_secs'] = jobfeatures['wall_limit_secs'] * jobfeatures['allocated_cpu']
# Write out if these have been set from files or prologue.user arguments
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:
if key in jobfeatures:
open(jobfeaturesDir + '/' + key, 'w').write(str(jobfeatures[key]))
# Try to get/calculate hs06_job
hs06_job = None
if 'hs06_job=' in fromRun + fromSysconfig:
try:
matchObject = re.search("hs06_job=([0-9.]*)", fromRun + '\n' + fromSysconfig)
hs06_job = float(matchObject.group(1))
except:
pass
if not hs06_job:
try:
hs06 = float(open('/etc/machinefeatures/hs06','r').readline())
except:
hs06 = None
try:
total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
except:
total_cpu = None
if hs06 and total_cpu:
# Simple pro-rata allocation of total hs06 depending on processors for this job
hs06_job = (jobfeatures['allocated_cpu'] * hs06) / total_cpu
if hs06_job:
# We got it from somewhere
open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)
# Try to get/calculate db12_job
db12_job = None
if 'db12_job=' in fromRun + fromSysconfig:
try:
matchObject = re.search("db12_job=([0-9.]*)", fromRun + '\n' + fromSysconfig)
db12_job = float(matchObject.group(1))
except:
pass
if not db12_job:
try:
db12 = float(open('/etc/machinefeatures/db12','r').readline())
except:
db12 = None
try:
total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
except:
total_cpu = None
if db12 and total_cpu:
# Simple pro-rata allocation of total db12 depending on processors for this job
db12_job = (jobfeatures['allocated_cpu'] * db12) / total_cpu
if db12_job:
# We got it from somewhere
open(jobfeaturesDir + '/db12_job', 'w').write('%.2f' % db12_job)
# We output $JOBFEATURES in case the calling script needs it
print jobfeaturesDir
\ No newline at end of file
......@@ -56,7 +56,7 @@
#
# The following per-cpu values in either mjf file will be used
# to calculate the corresponding per-job values if not given in
# either file or obtainable from PBS/Torque:
# either file or obtainable from HTCondor:
#
# - cpu_limit_secs_per_cpu
# - max_rss_bytes_per_cpu
......
mjf_tmp_dir=/tmp
if ( -r /etc/sysconfig/mjf ) then
source /var/sysconfig/mjf
endif
if ( -r /var/run/mjf ) then
source /var/run/mjf
endif
if ( -d /etc/machinefeatures ) then
setenv MACHINEFEATURES /etc/machinefeatures
endif
setenv JOBFEATURES `/usr/sbin/make-jobfeatures`
mjf_tmp_dir=/tmp
if [ -r /etc/sysconfig/mjf ] ; then
. /etc/sysconfig/mjf
fi
if [ -r /var/run/mjf ] ; then
. /etc/sysconfig/mjf
fi
if [ -d /etc/machinefeatures ] ; then
export MACHINEFEATURES=/etc/machinefeatures
fi
export JOBFEATURES=`/usr/sbin/make-jobfeatures`
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment