Commit 548b6c6b authored by Andrew McNab's avatar Andrew McNab
Browse files

Torque and HTCondor create db12 if enabled

parent 8aeb6bcf
......@@ -35,9 +35,10 @@ INSTALL_FILES=VERSION mjf.init \
prologue.user epilogue.user \
mjf.sh.torque mjf.csh.torque mjf-get-total-cpu.torque \
mjf.sh.htcondor mjf.csh.htcondor mjf-get-total-cpu.htcondor \
mjf-job-wrapper make-jobfeatures.htcondor
mjf-job-wrapper make-jobfeatures.htcondor \
diracBenchmark.py db12.init
TGZ_FILES=$(INSTALL_FILES) Makefile mjf-torque.spec mjf-htcondor.spec README
TGZ_FILES=$(INSTALL_FILES) Makefile mjf-torque.spec mjf-htcondor.spec mjf-db12.spec README
GNUTAR ?= tar
mjf-scripts.tgz: $(TGZ_FILES)
......@@ -95,3 +96,19 @@ htcondor-rpm: mjf-scripts.tgz
--define "_topdir $(shell pwd)/RPMTMP" \
--buildroot $(shell pwd)/RPMTMP/BUILDROOT mjf-htcondor.spec
db12-install: $(INSTALL_FILES)
mkdir -p $(RPM_BUILD_ROOT)/etc/rc.d/init.d \
$(RPM_BUILD_ROOT)/etc/db12 \
$(RPM_BUILD_ROOT)/usr/sbin
cp db12.init \
$(RPM_BUILD_ROOT)/etc/rc.d/init.d/db12
db12-rpm: mjf-scripts.tgz
rm -Rf RPMTMP
mkdir -p RPMTMP/SOURCES RPMTMP/SPECS RPMTMP/BUILD \
RPMTMP/SRPMS RPMTMP/RPMS/noarch RPMTMP/BUILDROOT
cp -f mjf-scripts.tgz RPMTMP/SOURCES
export MJF_VERSION=$(VERSION) ; rpmbuild -ba \
--define "_topdir $(shell pwd)/RPMTMP" \
--buildroot $(shell pwd)/RPMTMP/BUILDROOT mjf-db12.spec
\ No newline at end of file
#!/bin/bash
#
# db12 Run DB12 fast benchmark and create /etc/db12 files
#
# chkconfig: 345 10 90
# description: Run DB12 fast benchmark and create /etc/db12 files
# Source function library.
. /etc/init.d/functions
# If total_cpu is already created (in Kickstart?), we use that:
#
if [ -f /etc/db12/total_cpu ] ; then
$total_cpu=`cat /etc/db12/total_cpu`
fi
if [ "$total_cpu" == "" ] ; then
# If total_cpu not given in /etc/sysconfig/db12 then count OS processors
total_cpu=`grep '^processor[[:space:]]*:' /proc/cpuinfo | wc --lines`
mkdir -p /etc/db12
echo '$total_cpu' > /etc/db12/total_cpu
fi
start() {
[ "$EUID" != "0" ] && exit 1
echo -n $"Run DB12 fast benchmark: "
db12_sum=`/usr/sbin/diracBenchmark.py $total_cpu | cut -f2 -d' '`
if [ "$db12_sum" != "" ] ; then
echo "$db12_sum" > /etc/db12/db12
fi
echo_success
echo
return 0
}
stop() {
[ "$EUID" != "0" ] && exit 3
echo -n $"Dummy DB12 stop action: "
echo_success
echo
return 0
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart|force-reload|reload)
stop
start
;;
*)
echo $"Usage: $0 {start|stop|restart|force-reload|reload}"
exit 4
esac
#!/usr/bin/python
########################################################################
# File : diracBenchmark.py
# Author : Andrew McNab
########################################################################
""" DIRAC Benchmark 2012 by Ricardo Graciani, and wrapper functions to
run multiple instances in parallel
"""
import os
import sys
import random
import urllib
import multiprocessing
version = '0.1 DB12'
def singleDiracBenchmark( iterations = 1, resultObject = None ):
""" Get Normalized Power of one CPU in DIRAC Benchmark 2012 units (DB12)
"""
# This number of iterations corresponds to 1kHS2k.seconds, i.e. 250 HS06 seconds
n = int( 1000 * 1000 * 12.5 )
calib = 250.0
m = long( 0 )
m2 = long( 0 )
p = 0
p2 = 0
# Do one iteration extra to allow CPUs with variable speed (we ignore zeroth iteration)
for i in range( iterations + 1 ):
if i == 1:
start = os.times()
# Now the iterations
for _j in xrange( n ):
t = random.normalvariate( 10, 1 )
m += t
m2 += t * t
p += t
p2 += t * t
end = os.times()
cput = sum( end[:4] ) - sum( start[:4] )
wall = end[4] - start[4]
if not cput:
return None
else:
if resultObject is not None:
# This makes it easy to use with multiprocessing.Process
resultObject.value = calib * iterations / cput
# Return DIRAC-compatible values too
return { 'CPU' : cput, 'WALL' : wall, 'NORM' : calib * iterations / cput, 'UNIT' : 'DB12' }
def multipleDiracBenchmark( instances = 1, iterations = 1 ):
""" Run multiple instances of the DIRAC Benchmark in parallel
"""
processes = []
results = []
# Set up all the subprocesses
for i in range( instances ):
results.append( multiprocessing.Value('d', 0.0) )
processes.append( multiprocessing.Process( target = singleDiracBenchmark, args = ( iterations, results[i] ) ) )
# Start them all off at the same time
for p in processes:
p.start()
# Wait for them all to finish
for p in processes:
p.join()
raw = [ result.value for result in results ]
# Return the list of raw results, and the sum and mean of the list
return { 'raw' : raw, 'sum' : sum(raw), 'mean' : sum(raw)/len(raw) }
def wholenodeDiracBenchmark( instances = None, iterations = 1 ):
""" Run as many instances as needed to occupy the whole machine
"""
# Try $MACHINEFEATURES first if not given by caller
if not instances and 'MACHINEFEATURES' in os.environ:
try:
instances = int( urllib2.urlopen( os.environ['MACHINEFEATURES'] + '/total_cpu' ).read() )
except:
pass
# If not given by caller or $MACHINEFEATURES/total_cpu then just count CPUs
if not instances:
try:
instances = multiprocessing.cpu_count()
except:
instances = 1
return multipleDiracBenchmark( instances = instances, iterations = iterations )
def jobslotDiracBenchmark( instances = None, iterations = 1 ):
""" Run as many instances as needed to occupy the job slot
"""
# Try $JOBFEATURES first if not given by caller
if not instances and 'JOBFEATURES' in os.environ:
try:
instances = int( urllib2.urlopen( os.environ['JOBFEATURES'] + '/allocated_cpu' ).read() )
except:
pass
# If not given by caller or $JOBFEATURES/allocated_cpu then just run one instance
if not instances:
instances = 1
return multipleDiracBenchmark( instances = instances, iterations = iterations )
#
# If we run as a command
#
if __name__ == "__main__":
if len(sys.argv) == 1 or sys.argv[1] == 'single':
print singleDiracBenchmark()['sum']
sys.exit(0)
if sys.argv[1] == 'version':
print version
sys.exit(0)
if sys.argv[1] == 'wholenode':
result = wholenodeDiracBenchmark()
print result['mean'],result['sum'],result['raw']
sys.exit(0)
if sys.argv[1] == 'jobslot':
result = jobslotDiracBenchmark()
print result['mean'],result['sum'],result['raw']
sys.exit(0)
try:
instances = int( sys.argv[1] )
except:
sys.exit(1)
else:
result = multipleDiracBenchmark(instances = instances)
print result['mean'],result['sum'],result['raw']
sys.exit(0)
......@@ -50,6 +50,7 @@
# - max_swap_bytes
# - scratch_limit_bytes
# - hs06_job
# - db12_job
#
# Values in /var/run/mjf are preferred over /etc/sysconfig/mjf.
#
......@@ -233,6 +234,35 @@ if hs06_job:
# We got it from somewhere
open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)
# Try to get/calculate db12_job
db12_job = None
if 'db12_job=' in fromRun + fromSysconfig:
try:
matchObject = re.search("db12_job=([0-9.]*)", fromRun + '\n' + fromSysconfig)
db12_job = float(matchObject.group(1))
except:
pass
if not db12_job:
try:
db12 = float(open('/etc/machinefeatures/db12','r').readline())
except:
db12 = None
try:
total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
except:
total_cpu = None
if db12 and total_cpu:
# Simple pro-rata allocation of total db12 depending on processors for this job
db12_job = (jobfeatures['allocated_cpu'] * db12) / total_cpu
if db12_job:
# We got it from somewhere
open(jobfeaturesDir + '/db12_job', 'w').write('%.2f' % db12_job)
# We output $JOBFEATURES in case the calling script needs it
print jobfeaturesDir
Name: mjf-db12
Version: %(echo ${MJF_VERSION:-0.0})
Release: 1
BuildArch: noarch
Summary: DIRAC Benchmark (DB12) for Machine/Job Features
License: BSD
Group: System Environment/Daemons
Source: mjf-scripts.tgz
Vendor: GridPP
Packager: Andrew McNab <Andrew.McNab@cern.ch>
%description
Run at boot time to produce DIRAC Benchmark (DB12) and write to /etc/db12 ready for MJF
%prep
%setup -n mjf-scripts
%build
%install
make db12-install
%post
chkconfig db12 on
%preun
if [ "$1" = "0" ] ; then
# if uninstallation rather than upgrade then stop
chkconfig mjf off
fi
%files
/etc/db12
/etc/rc.d/init.d/*
/usr/sbin/*
......@@ -15,6 +15,7 @@
# - hs06
# - shutdowntime
# - grace_secs
# - db12
#
# Persistent configuration goes in sysconfig
......@@ -43,6 +44,11 @@ start() {
total_cpu=`/usr/sbin/mjf-get-total-cpu`
fi
# If still not explicitly configured then get it from /etc/db12 if available
if [ "$total_cpu" == "" -a -f /etc/db12/total_cpu ] ; then
total_cpu=`cat /etc/db12/total_cpu`
fi
# If still not explicitly configured then it get from the OS
if [ "$total_cpu" == "" ] ; then
total_cpu=`grep '^processor[[:space:]]*:' /proc/cpuinfo | wc --lines`
......@@ -54,6 +60,15 @@ start() {
echo -n "$hs06" > /etc/machinefeatures.tmp/hs06
fi
# If not explicitly configured then get from /etc/db12
if [ "$db12" == "" -a -f /etc/db12/db12 ] ; then
db12=`cat /etc/db12/db12`
fi
if [ "$db12" != "" ] ; then
echo -n "$db12" > /etc/machinefeatures.tmp/db12
fi
if [ "$shutdowntime" != "" ] ; then
echo -n "$shutdowntime" > /etc/machinefeatures.tmp/shutdowntime
fi
......
......@@ -50,6 +50,7 @@
# - max_swap_bytes
# - scratch_limit_bytes
# - hs06_job
# - db12_job
#
# Values in /var/run/mjf are preferred over /etc/sysconfig/mjf.
#
......@@ -218,3 +219,32 @@ if hs06_job:
# We got it from somewhere
open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)
# Try to get/calculate db12_job
db12_job = None
if 'db12_job=' in fromRun + fromSysconfig:
try:
matchObject = re.search("db12_job=([0-9.]*)", fromRun + '\n' + fromSysconfig)
db12_job = float(matchObject.group(1))
except:
pass
if not db12_job:
try:
db12 = float(open('/etc/machinefeatures/db12','r').readline())
except:
db12 = None
try:
total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
except:
total_cpu = None
if db12 and total_cpu:
# Simple pro-rata allocation of total db12 depending on processors for this job
db12_job = (jobfeatures['allocated_cpu'] * db12) / total_cpu
if db12_job:
# We got it from somewhere
open(jobfeaturesDir + '/db12_job', 'w').write('%.2f' % db12_job)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment