Commit f897a5b6 authored by Andrew McNab's avatar Andrew McNab
Browse files

Fill out prologue.user and add epilogue.user

parent 22f19994
......@@ -31,7 +31,7 @@
include VERSION
INSTALL_FILES=VERSION prologue.user mjf.init mjf.sh mjf.csh
INSTALL_FILES=VERSION prologue.user epilogue.user mjf.init mjf.sh mjf.csh
TGZ_FILES=$(INSTALL_FILES) Makefile mjf-torque.spec
......@@ -46,7 +46,7 @@ install: $(INSTALL_FILES)
mkdir -p $(RPM_BUILD_ROOT)/var/lib/torque/mom_priv \
$(RPM_BUILD_ROOT)/etc/rc.d/init.d \
$(RPM_BUILD_ROOT)/etc/profile.d
cp prologue.user \
cp prologue.user epilogue.user \
$(RPM_BUILD_ROOT)/var/lib/torque/mom_priv
cp mjf.init \
$(RPM_BUILD_ROOT)/etc/rc.d/init.d/mjf
......
#!/bin/sh
#
# epilogue.user script for Machine/Job Features on Torque/PBS
#
# Andrew McNab, University of Manchester.
# Copyright (c) 2016. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# o Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
# o Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# Remove $JOBFEATURES directory for this job where the directory
# is at $HOME/jobfeatures-$PBS_JOBID
if [ "$PBS_JOBID" != "" -a -d "$HOME/jobfeatures-$PBS_JOBID" ] ; then
rm -Rf "$HOME/jobfeatures-$PBS_JOBID"
fi
......@@ -30,6 +30,25 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# Create $JOBFEATURES files following the Machine/Job Features
# specification in HSF-TN-2016-02
#
# This script creates the $JOBFEATURES directory at
# $HOME/jobfeatures-$PBS_JOBID and attempts to populate it
# from Torque/PBS information and from
# $MACHINEFEATURES=/etc/machinefeatures
#
# If the following variables are present in /var/run/mjf or
# /etc/sysconfig/mjf then they are preferred:
#
# - hs06_job
# - allocated_cpu
# - wall_limit_secs
# - cpu_limit_secs
# - max_rss_bytes
# - max_swap_bytes
# - scratch_limit_bytes
#
import os
......@@ -37,27 +56,23 @@ import re
import sys
import time
#
# PROGRAM MAIN !!!
#
homdeDir = os.path.expanduser('~')
homeDir = os.path.expanduser('~')
job_id = os.environ['PBS_JOBID']
jobfeaturesDir = homeDir + '/jobfeatures-' + jobID
jobfeaturesDir = homeDir + '/jobfeatures-' + job_id
os.mkdir(jobfeaturesDir)
open(jobfeaturesDir + '/job_id', 'w').write(job_id)
jobfeatures = {}
try:
allocated_cpu = int(os.environ['PBS_NUM_PPN'])
jobfeatures['allocated_cpu'] = int(os.environ['PBS_NUM_PPN'])
except:
allocated_cpu = 1
jobfeatures['allocated_cpu'] = 1
open(jobfeaturesDir + '/job_id', 'w').write(str(allocated_cpu))
open(jobfeaturesDir + '/job_id', 'w').write(str(jobfeatures['allocated_cpu']))
try:
hs06 = float(open('/etc/machinefeatures/hs06','r').readline())
......@@ -71,14 +86,88 @@ except:
if hs06 and total_cpu:
# Simple pro-rata allocation of total hs06 depending on processors for this job
hs06_job = (allocated_cpu * hs06) / total_cpu
open(jobfeaturesDir + '/hs06_job', 'w').write(str(hs06_job))
hs06_job = (jobfeatures['allocated_cpu'] * hs06) / total_cpu
else:
hs06_job = None
jobstart_secs = int(time.time())
open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs))
jobfeatures['wall_limit_secs'] = None
jobfeatures['cpu_limits_secs'] = None
jobfeatures['max_rss_bytes'] = None
jobfeatures['max_swap_bytes'] = None
try:
qstatResult = os.popen('qstat -f ' + jobID,'r').read()
qstatResult = os.popen('qstat -f ' + job_id,'r').read()
except:
# Ok if not possible (qstat not installed? Not allowed to use it?)
pass
else:
try:
wallMatchObject = re.search(' *Resource_List.walltime = ([0-9]*):([0-9]*):([0-9]*)', qstatResult)
jobfeatures['wall_limit_secs'] = int(wallMatchObject.group(1)) * 3600 + int(wallMatchObject.group(2)) * 60 + int(wallMatchObject.group(3))
except:
pass
jobstart_secs = int(time.time())
open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs))
try:
cpuMatchObject = re.search(' *Resource_List.cput = ([0-9]*):([0-9]*):([0-9]*)', qstatResult)
jobfeatures['cpu_limit_secs'] = int(cpuMatchObject.group(1)) * 3600 + int(cpuMatchObject.group(2)) * 60 + int(cpuMatchObject.group(3))
except:
pass
try:
rssMatchObject = re.search(' *Resource_List.mem = ([0-9]*)([a-z]*)', qstatResult)
rssInt = int(rssMatchObject.group(1))
rssUnit = rssMatchObject.group(2)
except:
pass
else:
if rssUnit == 'gb':
jobfeatures['max_rss_bytes'] = rssInt * 1000000000
elif rssUnit == 'mb':
jobfeatures['max_rss_bytes'] = rssInt * 1000000
elif rssUnit == 'kb':
jobfeatures['max_rss_bytes'] = rssInt * 1000
elif rssUnit == 'b':
# Is this the right name??
jobfeatures['max_rss_bytes'] = rssInt
# Values in /var/run/mjf take precedence
try:
fromRun = open('/var/run/mjf','r').read()
except:
fromRun = ''
# Also look in persistent /etc/sysconfig/mjf
try:
fromSysconfig = open('/etc/sysconfig/mjf','r').read()
except:
fromSysconfig = ''
# hs06_job is a float so special handling
if 'hs06_job=' in fromRun + fromSysconfig:
try:
matchObject = re.search(key + "=([0-9.]*)", fromSysconfig + fromRun)
hs06_job = float(matchObject.group(1))
except:
pass
if hs06_job:
# Has been defined here or before
open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)
# The rest are integers so handle with a list
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:
if key + '=' in fromRun + fromSysconfig:
try:
matchObject = re.search(key + "=([0-9]*)", fromSysconfig + fromRun)
jobfeatures[key] = int(matchObject.group(1))
except:
pass
if key in jobfeatures:
open(jobfeaturesDir + '/' + key, 'w').write(str(jobfeatures[key]))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment